From f27500cd05567bd24af7b827cf1a7d2d3f07ea7e Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Thu, 25 Sep 2025 17:55:05 +0200
Subject: [PATCH 01/11] update schema_input.json

---
 assets/schema_input.json | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 7fedd10..e81512d 100755
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -7,27 +7,30 @@
     "items": {
         "type": "object",
         "properties": {
-            "sample": {
+            "sample_id": {
                 "type": "string",
                 "pattern": "^\\S+$",
-                "errorMessage": "Sample name must be provided and cannot contain spaces",
+                "errorMessage": "Sample ID must be provided and cannot contain spaces",
                 "meta": ["id"]
             },
-            "fastq_1": {
+            "bam_path": {
                 "type": "string",
                 "format": "file-path",
                 "exists": true,
-                "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$",
-                "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+                "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.bam$",
+                "errorMessage": "BAM file path must be provided, cannot contain spaces and must have extension '.bam'"
             },
-            "fastq_2": {
+            "sample_type": {
                 "type": "string",
-                "format": "file-path",
-                "exists": true,
-                "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$",
-                "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+                "enum": ["case", "control"],
+                "errorMessage": "Sample type must be either 'case' or 'control'"
+            },
+            "match_id": {
+                "type": "string",
+                "pattern": "^\\S+$",
+                "errorMessage": "Match ID must be provided and cannot contain spaces"
             }
         },
-        "required": ["sample", "fastq_1"]
+        "required": ["sample_id", "bam_path", "sample_type", "match_id"]
     }
-}
+}
\ No newline at end of file

From 93b9addb2dc8038dd0504be14bdad292f5a896af Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Thu, 25 Sep 2025 18:20:42 +0200
Subject: [PATCH 02/11] remove time limits in base.config + update
 subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf

---
 conf/base.config                              |  8 +++----
 .../utils_nfcore_ontvar_pipeline/main.nf      | 24 +++++--------------
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 62162c3..6d96e3b 100755
--- a/conf/base.config
+++ b/conf/base.config
@@ -29,22 +29,22 @@ process {
     withLabel:process_single {
         cpus   = { 1                   }
         memory = { 6.GB * task.attempt }
-        time   = { 4.h  * task.attempt }
+        // time   = { 4.h  * task.attempt }
     }
     withLabel:process_low {
         cpus   = { 2     * task.attempt }
         memory = { 12.GB * task.attempt }
-        time   = { 4.h   * task.attempt }
+        // time   = { 4.h   * task.attempt }
     }
     withLabel:process_medium {
         cpus   = { 6     * task.attempt }
         memory = { 36.GB * task.attempt }
-        time   = { 8.h   * task.attempt }
+        // time   = { 8.h   * task.attempt }
     }
     withLabel:process_high {
         cpus   = { 12    * task.attempt }
         memory = { 72.GB * task.attempt }
-        time   = { 16.h  * task.attempt }
+        // time   = { 16.h  * task.attempt }
     }
     withLabel:process_long {
         time   = { 20.h  * task.attempt }
diff --git a/subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf b/subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf
index 54a5028..00c7554 100755
--- a/subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf
@@ -73,24 +73,12 @@ workflow PIPELINE_INITIALISATION {
     //
 
     Channel
-        .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json"))
-        .map {
-            meta, fastq_1, fastq_2 ->
-                if (!fastq_2) {
-                    return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
-                } else {
-                    return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
-                }
-        }
-        .groupTuple()
-        .map { samplesheet ->
-            validateInputSamplesheet(samplesheet)
-        }
-        .map {
-            meta, fastqs ->
-                return [ meta, fastqs.flatten() ]
-        }
-        .set { ch_samplesheet }
+    .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json"))
+    .map {
+        meta, bam_path, sample_type, match_id ->
+            return [meta.id, bam_path, sample_type, match_id]
+    }
+    .set { ch_samplesheet }
 
     emit:
     samplesheet = ch_samplesheet

From cd507aa2df0a6737786c716df7434c45f3494362 Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Fri, 26 Sep 2025 10:09:39 +0200
Subject: [PATCH 03/11] modify samplesheet structure and output file  names

---
 assets/schema_input.json                      | 22 ++++++-------
 conf/modules.config                           |  8 ++---
 .../utils_nfcore_ontvar_pipeline/main.nf      |  5 +--
 workflows/ontvar.nf                           | 32 ++++++++++---------
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index e81512d..6ab045e 100755
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -7,30 +7,30 @@
     "items": {
         "type": "object",
         "properties": {
-            "sample_id": {
+            "group_id": {
                 "type": "string",
                 "pattern": "^\\S+$",
-                "errorMessage": "Sample ID must be provided and cannot contain spaces",
+                "errorMessage": "Group ID must be provided and cannot contain spaces",
                 "meta": ["id"]
             },
-            "bam_path": {
+            "sample_id": {
                 "type": "string",
-                "format": "file-path",
-                "exists": true,
-                "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.bam$",
-                "errorMessage": "BAM file path must be provided, cannot contain spaces and must have extension '.bam'"
+                "pattern": "^\\S+$",
+                "errorMessage": "Sample ID must be provided and cannot contain spaces"
             },
             "sample_type": {
                 "type": "string",
                 "enum": ["case", "control"],
                 "errorMessage": "Sample type must be either 'case' or 'control'"
             },
-            "match_id": {
+            "bam_path": {
                 "type": "string",
-                "pattern": "^\\S+$",
-                "errorMessage": "Match ID must be provided and cannot contain spaces"
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.bam$",
+                "errorMessage": "BAM file path must be provided, cannot contain spaces and must have extension '.bam'"
             }
         },
-        "required": ["sample_id", "bam_path", "sample_type", "match_id"]
+        "required": ["group_id", "sample_id", "sample_type", "bam_path"]
     }
 }
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 2dab2f9..51f1067 100755
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -248,8 +248,8 @@ process {
             pattern: '*.{tsv,vcf}',
             saveAs: { filename -> 
                 def meta = task.ext.meta ?: [:]
-                def sample = meta.sample ?: meta.id ?: "unknown"
-                filename.replaceAll(/^[^.]+/, "${sample}")
+                def group_id = meta.sample ?: meta.id ?: "unknown"
+                filename.replaceAll(/^[^.]+/, "${group_id}")
             }
         ]
     }
@@ -289,8 +289,8 @@ process {
             pattern: '*.{tsv,vcf}',
             saveAs: { filename -> 
                 def meta = task.ext.meta ?: [:]
-                def sample = meta.sample ?: meta.id ?: "unknown"
-                filename.replaceAll(/^[^.]+/, "${sample}")
+                def group_id = meta.sample ?: meta.id ?: "unknown"
+                filename.replaceAll(/^[^.]+/, "${group_id}")
             }
         ]
     }
diff --git a/subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf b/subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf
index 00c7554..f2c8e57 100755
--- a/subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_ontvar_pipeline/main.nf
@@ -75,8 +75,9 @@ workflow PIPELINE_INITIALISATION {
     Channel
     .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json"))
     .map {
-        meta, bam_path, sample_type, match_id ->
-            return [meta.id, bam_path, sample_type, match_id]
+        meta, sample_id, sample_type, bam_path ->
+            // Return: [group_id, sample_id, sample_type, bam_path]
+            return [meta.id, sample_id, sample_type, bam_path]
     }
     .set { ch_samplesheet }
 
diff --git a/workflows/ontvar.nf b/workflows/ontvar.nf
index 71095b7..0f4f3a0 100755
--- a/workflows/ontvar.nf
+++ b/workflows/ontvar.nf
@@ -56,22 +56,24 @@ workflow ONTVAR {
     ch_multiqc_files = Channel.empty()
 
     ch_sample_info = ch_samplesheet
+    // ch_sample_info now contains: [group_id, sample_id, sample_type, bam_path]
+    //                               [   0   ,    1    ,     2     ,    3      ]
 
     cases = ch_sample_info
-        .filter { it[2] == 'case' }
+        .filter { it[2] == 'case' }      // sample_type is index 2
     controls = ch_sample_info
-        .filter { it[2] == 'control' }
+        .filter { it[2] == 'control' }   // sample_type is index 2
     controls_by_match = controls
-        .map { c -> tuple(c[3], c[1]) }
+        .map { c -> tuple(c[0], c[3]) }  // [group_id, bam_path]
 
     sv_input = cases
-        .map { c -> tuple(c[3], c[0], c[1]) }  // map to [match_id, sample_id, bam_path]
-        .join(controls_by_match, remainder: true)  // join on match_id
+        .map { c -> tuple(c[0], c[0], c[3]) }  // map to [group_id, group_id, bam_path] - using group_id as sample name
+        .join(controls_by_match, remainder: true)  // join on group_id
         .map { it -> 
-            def sample_id = it[1] 
+            def group_id = it[0]  // Use group_id as sample identifier
             def case_bam = it[2]
             def control_bam = it[3]  // control_bam comes from join
-            tuple(sample_id, case_bam, control_bam ?: null)
+            tuple(group_id, case_bam, control_bam ?: null)
         }
 
     // ──────────────────────────────────────────────────────────────────────
@@ -81,8 +83,8 @@ workflow ONTVAR {
     // SNIFFLES
     sniffles_input = sv_input
         .map { it ->
-            def sample_id = it[0]
-            tuple([id: "${sample_id}_sniffles", sample: sample_id], it[1], file("${it[1]}.bai"))
+            def group_id = it[0]
+            tuple([id: "${group_id}_sniffles", sample: group_id], it[1], file("${it[1]}.bai"))
         }
 
     SNIFFLES(
@@ -96,8 +98,8 @@ workflow ONTVAR {
     // CUTESV
     cutesv_input = sv_input
         .map { it ->
-            def sample_id = it[0]
-            tuple([id: "${sample_id}_cutesv", sample: sample_id], it[1], file("${it[1]}.bai"))
+            def group_id = it[0]
+            tuple([id: "${group_id}_cutesv", sample: group_id], it[1], file("${it[1]}.bai"))
         }
     
     CUTESV(
@@ -108,17 +110,17 @@ workflow ONTVAR {
     // SEVERUS
     severus_with_control_input = sv_input.filter { it[2] }
         .map { it ->
-            def sample_id = it[0]
+            def group_id = it[0]
             def case_bam = it[1]
             def control_bam = it[2]
-            tuple([id: sample_id, sample: sample_id, has_control: true, control_bam: control_bam], case_bam, file("${case_bam}.bai"), control_bam, file("${control_bam}.bai"), [])
+            tuple([id: group_id, sample: group_id, has_control: true, control_bam: control_bam], case_bam, file("${case_bam}.bai"), control_bam, file("${control_bam}.bai"), [])
         }
     
     severus_no_control_input = sv_input.filter { !it[2] }
         .map { it ->
-            def sample_id = it[0]
+            def group_id = it[0]
             def case_bam = it[1]
-            tuple([id: sample_id, sample: sample_id, has_control: false], case_bam, file("${case_bam}.bai"), [], [], [])
+            tuple([id: group_id, sample: group_id, has_control: false], case_bam, file("${case_bam}.bai"), [], [], [])
         }
     
     SEVERUS_WITH_CONTROL(

From e304433a64231dbfaea6993b387e6a9e6c3338d8 Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Fri, 26 Sep 2025 11:04:46 +0200
Subject: [PATCH 04/11] streamline process ids

---
 conf/modules.config                           |  6 +-
 .../local/merge_vcf_headers/environment.yml   |  0
 modules/local/merge_vcf_headers/main.nf       |  0
 modules/local/merge_vcf_headers/meta.yml      |  0
 modules/local/rename_vcf/main.nf              |  2 +-
 workflows/ontvar.nf                           | 68 ++++++++++---------
 6 files changed, 41 insertions(+), 35 deletions(-)
 delete mode 100644 modules/local/merge_vcf_headers/environment.yml
 delete mode 100644 modules/local/merge_vcf_headers/main.nf
 delete mode 100644 modules/local/merge_vcf_headers/meta.yml

diff --git a/conf/modules.config b/conf/modules.config
index 51f1067..dc964dd 100755
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -34,6 +34,7 @@ process {
     withName: 'SNIFFLES' {
         cpus = 3
         memory = 3.GB
+        ext.prefix = { "${meta.sample ?: meta.id}_sniffles" }
         ext.args = {
             def base_args = "--mapq ${params.mapq} --minsvlen ${params.minsvlen} --minsupport ${params.minsupport} --cluster-merge-pos ${params.cluster_merge_pos}"
             def phase_arg = params.phase ? "--phase" : ""
@@ -48,6 +49,7 @@ process {
     withName: 'CUTESV' {
         cpus = 4
         memory = 12.GB
+        ext.prefix = { "${meta.sample ?: meta.id}_cutesv" }
         ext.args = {
             def base_args = "--min_support ${params.cutesv_min_support} --min_size ${params.min_size} --min_mapq ${params.cutesv_min_mapq}"
             base_args += " --max_cluster_bias_INS ${params.max_cluster_bias_INS} --diff_ratio_merging_INS ${params.diff_ratio_merging_INS}"
@@ -64,6 +66,7 @@ process {
     withName: 'SEVERUS_WITH_CONTROL' {
         cpus = 4
         memory = 5.GB
+        ext.prefix = { "${meta.sample ?: meta.id}_tn_severus" }
         ext.args = {
             def base_args = "--min-sv-size ${params.min_sv_size} --min-mapq ${params.severus_min_mapq} --min-support ${params.severus_min_support}"
             def user_args = params.severus_args ? params.severus_args : ""
@@ -77,6 +80,7 @@ process {
     withName: 'SEVERUS_NO_CONTROL' {
         cpus = 4
         memory = 5.GB
+        ext.prefix = { "${meta.sample ?: meta.id}_to_severus" }
         ext.args = {
             def base_args = "--min-sv-size ${params.min_sv_size} --PON \"${params.PON}\" --min-mapq ${params.severus_min_mapq} --min-support ${params.severus_min_support}"
             def user_args = params.severus_args ? params.severus_args : ""
@@ -189,7 +193,7 @@ process {
         ]
     }
 
-    withName: 'BCFTOOLS_SORT_SAMPLE' {
+    withName: 'SORT_VCF' {
         publishDir = [
             enabled: false  // Intermediate step
         ]
diff --git a/modules/local/merge_vcf_headers/environment.yml b/modules/local/merge_vcf_headers/environment.yml
deleted file mode 100644
index e69de29..0000000
diff --git a/modules/local/merge_vcf_headers/main.nf b/modules/local/merge_vcf_headers/main.nf
deleted file mode 100644
index e69de29..0000000
diff --git a/modules/local/merge_vcf_headers/meta.yml b/modules/local/merge_vcf_headers/meta.yml
deleted file mode 100644
index e69de29..0000000
diff --git a/modules/local/rename_vcf/main.nf b/modules/local/rename_vcf/main.nf
index 5a23b5e..d1e7603 100755
--- a/modules/local/rename_vcf/main.nf
+++ b/modules/local/rename_vcf/main.nf
@@ -1,5 +1,5 @@
 process RENAME_VCF {
-    tag "${meta.id}_${suffix}"
+    tag "${meta.id}"
     label 'process_single'
 
     input:
diff --git a/workflows/ontvar.nf b/workflows/ontvar.nf
index 0f4f3a0..2a03dff 100755
--- a/workflows/ontvar.nf
+++ b/workflows/ontvar.nf
@@ -34,7 +34,7 @@ include { SVDB_QUERY as SVDB_QUERY_COHORT } from '../modules/nf-core/svdb/query/
 include { BCFTOOLS_VIEW as CALLER_SUPPORT_FILTER } from '../modules/nf-core/bcftools/view/main'
 include { BCFTOOLS_VIEW as AF_FILTER } from '../modules/nf-core/bcftools/view/main'
 include { BCFTOOLS_VIEW as AF_FILTER_COHORT } from '../modules/nf-core/bcftools/view/main'
-include { BCFTOOLS_SORT as BCFTOOLS_SORT_SAMPLE } from '../modules/nf-core/bcftools/sort/main'
+include { BCFTOOLS_SORT as SORT_VCF } from '../modules/nf-core/bcftools/sort/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -84,7 +84,7 @@ workflow ONTVAR {
     sniffles_input = sv_input
         .map { it ->
             def group_id = it[0]
-            tuple([id: "${group_id}_sniffles", sample: group_id], it[1], file("${it[1]}.bai"))
+            tuple([id: "${group_id}", sample: group_id], it[1], file("${it[1]}.bai"))
         }
 
     SNIFFLES(
@@ -99,7 +99,7 @@ workflow ONTVAR {
     cutesv_input = sv_input
         .map { it ->
             def group_id = it[0]
-            tuple([id: "${group_id}_cutesv", sample: group_id], it[1], file("${it[1]}.bai"))
+            tuple([id: "${group_id}", sample: group_id], it[1], file("${it[1]}.bai"))
         }
     
     CUTESV(
@@ -159,8 +159,8 @@ workflow ONTVAR {
     SUMMARIZE_CALLERS(
         all_caller_vcfs
             .collect { meta, vcf -> vcf }
-            .map { vcf_list -> tuple([id: "01_raw_calls"], vcf_list) },
-        Channel.value("01_raw_calls")
+            .map { vcf_list -> tuple([id: "raw_caller_summary"], vcf_list) },
+        Channel.value("raw_calls")
     )
 
     // ──────────────────────────────────────────────────────────────────────
@@ -169,8 +169,8 @@ workflow ONTVAR {
 
     sv_calls_by_sample = all_caller_vcfs
         .map { meta, vcf -> 
-            def sample_name = meta.containsKey('sample') ? meta.sample : meta.id.replaceAll('_(sniffles|cutesv|severus)$', '')
-            tuple(sample_name, vcf)
+            def group_id = meta.sample ?: meta.id  // Use sample field first, fallback to id
+            tuple(group_id, vcf)
         }
         .groupTuple(by: 0)
 
@@ -181,8 +181,8 @@ workflow ONTVAR {
     jasminesv_sample_input = sv_calls_by_sample
         .filter { meta, vcf_list -> vcf_list.size() > 0 }
         .map { meta, vcf_list -> 
-            def sample_id = meta
-            tuple([id: "${sample_id}_jasmine", sample: sample_id], vcf_list, [], [])
+            def group_id = meta
+            tuple([id: group_id, sample: group_id, step: "consensus"], vcf_list, [], [])
         }
 
     // Prepare Jasmine input channels (per-sample)
@@ -215,22 +215,22 @@ workflow ONTVAR {
 
     sample_filtered = ch_jasmine_sample_vcfs | FILTER_CHR
 
-    sample_filtered | BCFTOOLS_SORT_SAMPLE
-    sample_sorted = BCFTOOLS_SORT_SAMPLE.out.vcf
+    sample_filtered | SORT_VCF
+    sample_sorted = SORT_VCF.out.vcf
 
     // ──────────────────────────────────────────────────────────────────────
     // Filter SVs supported by ≥2 callers
     // ──────────────────────────────────────────────────────────────────────
 
     bcftools_sample_input = sample_sorted
-        .map { meta, vcf ->
-            def v = vcf.toString()
-            def updated_meta = meta + [sample: meta.sample ?: meta.id?.replaceAll('_jasmine.*', '')]
-            def idx = file(v + '.csi')
-            if( !idx.exists() ) idx = file(v + '.tbi')
-            def idx_out = idx.exists() ? idx : []
-            tuple(updated_meta, file(v), idx_out)
-        }
+    .map { meta, vcf ->
+        def v = vcf.toString()
+        def updated_meta = [id: meta.sample ?: meta.id, sample: meta.sample ?: meta.id, step: "caller_support"]
+        def idx = file(v + '.csi')
+        if( !idx.exists() ) idx = file(v + '.tbi')
+        def idx_out = idx.exists() ? idx : []
+        tuple(updated_meta, file(v), idx_out)
+    }
     
     CALLER_SUPPORT_FILTER(
         bcftools_sample_input,
@@ -242,8 +242,8 @@ workflow ONTVAR {
     SUMMARIZE_CALLER_MERGED(
         CALLER_SUPPORT_FILTER.out.vcf
             .first()
-            .map { meta, vcf -> tuple([id: "02_caller_merged"], vcf) },
-        Channel.value("02_caller_merged")
+            .map { meta, vcf -> tuple([id: "consensus_summary"], vcf) },
+        Channel.value("consensus")
     )
 
     // ──────────────────────────────────────────────────────────────────────
@@ -271,12 +271,13 @@ workflow ONTVAR {
     )
 
     ch_per_sample_bcftools_input = SVDB_QUERY_SAMPLE.out.vcf
-        .map { meta, annotated_vcf ->
-            def idx = file(annotated_vcf.toString() + '.csi')
-            if( !idx.exists() ) idx = file(annotated_vcf.toString() + '.tbi')
-            def idx_out = idx.exists() ? idx : []
-            tuple(meta, file(annotated_vcf), idx_out)
-        }
+    .map { meta, annotated_vcf ->
+        def updated_meta = [id: meta.sample ?: meta.id, sample: meta.sample ?: meta.id, step: "af_filter"]
+        def idx = file(annotated_vcf.toString() + '.csi')
+        if( !idx.exists() ) idx = file(annotated_vcf.toString() + '.tbi')
+        def idx_out = idx.exists() ? idx : []
+        tuple(updated_meta, file(annotated_vcf), idx_out)
+    }
     
     ch_bcftools_regions = Channel.value([])
     ch_bcftools_targets = Channel.value([])
@@ -293,8 +294,8 @@ workflow ONTVAR {
     SUMMARIZE_CALLER_MERGED_FILTERED(
         AF_FILTER.out.vcf
             .first()
-            .map { meta, vcf -> tuple([id: "03_caller_merged_filtered"], vcf) },
-        Channel.value("03_caller_merged_filtered")
+            .map { meta, vcf -> tuple([id: "filtered_summary"], vcf) },
+        Channel.value("filtered")
     )
 
     // ──────────────────────────────────────────────────────────────────────
@@ -335,8 +336,9 @@ workflow ONTVAR {
     ANNOTSV_PER_SAMPLE(
         AF_FILTER.out.vcf
             .map { meta, vcf -> 
-            meta.id = meta.id + "_annotated"
-            tuple(meta, vcf, [], []) },
+                def updated_meta = [id: meta.sample ?: meta.id, sample: meta.sample ?: meta.id, step: "final_annotation"]
+                tuple(updated_meta, vcf, [], [])
+            },
         ch_annotsv_annotations,
         ch_candidate_genes,
         ch_false_positive_snv,
@@ -411,7 +413,7 @@ workflow ONTVAR {
     // Cohort summaries - one per cohort directory
     SUMMARIZE_COHORT(
         AF_FILTER_COHORT.out.vcf
-        .map { meta, vcf -> tuple(meta, vcf) },
+            .map { meta, vcf -> tuple([id: "cohort_summary"], vcf) },
         Channel.value("cohort")
     )
 
@@ -446,7 +448,7 @@ workflow ONTVAR {
     ch_versions = ch_versions.mix(SEVERUS_NO_CONTROL.out.versions)
     ch_versions = ch_versions.mix(JASMINESV_SAMPLE.out.versions)
     ch_versions = ch_versions.mix(JASMINESV_COHORT.out.versions)
-    ch_versions = ch_versions.mix(BCFTOOLS_SORT_SAMPLE.out.versions)
+    ch_versions = ch_versions.mix(SORT_VCF.out.versions)
     ch_versions = ch_versions.mix(SVDB_QUERY_SAMPLE.out.versions)
     ch_versions = ch_versions.mix(SVDB_QUERY_COHORT.out.versions)
     ch_versions = ch_versions.mix(CALLER_SUPPORT_FILTER.out.versions)

From 0107ed626c5418d53cf1fe2b82cfbc0d36f91a33 Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Fri, 26 Sep 2025 14:47:37 +0200
Subject: [PATCH 05/11] delete modules_params.config

---
 conf/modules_params.config | 124 -------------------------------------
 nextflow.config            |   1 -
 2 files changed, 125 deletions(-)
 delete mode 100644 conf/modules_params.config

diff --git a/conf/modules_params.config b/conf/modules_params.config
deleted file mode 100644
index f4f9c32..0000000
--- a/conf/modules_params.config
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Module-specific parameter configurations
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-params {
-
-    // SV callers
-    sniffles = [
-        tandem_repeats: "${projectDir}/assets/vntr/hg38.p14.trf.bed",
-        sniffles_args: "--mapq 20 \
-            --minsvlen 50 \
-            --minsupport 5 \
-            --cluster-merge-pos 100"
-    ]
-    cutesv = [
-        cutesv_args: "--genotype \
-            --min_support 5 \
-            --min_size 50 \
-            --min_mapq 20 \
-            --max_cluster_bias_INS 100 \
-            --diff_ratio_merging_INS 0.3 \
-            --max_cluster_bias_DEL 100 \
-            --diff_ratio_merging_DEL 0.3"
-    ]
-    severus_with_control = [
-        vntr_bed: "${projectDir}/assets/vntr/hg38.p14.trf.bed",
-        severus_with_control_args: "--min-sv-size 50 \
-            --min-mapq 20 \
-            --min-support 5"
-    ]
-    severus_no_control = [
-        vntr_bed: "${projectDir}/assets/vntr/hg38.p14.trf.bed",
-        severus_no_control_args: "--min-sv-size 50 \
-            --PON \"${projectDir}/assets/pon/PoN_1000G_hg38_extended.tsv.gz\" \
-            --min-mapq 20 \
-            --min-support 5"
-    ]
-    
-    // Jasmine sample and cohort consensus
-    jasminesv_sample = [
-        fasta: null,
-        fasta_fai: null,
-        chr_norm: null,
-        jasminesv_sample_args: """--min_overlap 0.7 \
-            --spec_len 50 \
-            --min_seq_id 0.7 \
-            --max_dist_linear 0.5 \
-            --max_dist 3000 \
-            --min_dist 100 \
-            --pre_normalize \
-            --normalize_type \
-            --non_mutual_distance \
-            --spec_reads 5"""
-    ]
-    jasminesv_cohort = [
-        fasta: null,
-        fasta_fai: null,
-        chr_norm: null,
-        jasminesv_cohort_args: """--min_overlap 0.7 \
-            --spec_len 50 \
-            --min_seq_id 0.7 \
-            --max_dist_linear 0.5 \
-            --max_dist 3000 \
-            --min_dist 100 \
-            --pre_normalize \
-            --normalize_type \
-            --non_mutual_distance \
-            --spec_reads 5"""
-    ]
-
-    // AF annotation with SVDB
-    svdb_query = [
-        vcf_dbs: [
-            "${projectDir}/assets/svdb_databases/gnomad.v4.1.sv.sites.rmMissing.vcf.gz",
-            "${projectDir}/assets/svdb_databases/UWONT_450_AF.vcf.gz"
-        ],
-        in_frq: ["AF", "PFneedLR"],
-        out_frq: [ "AFgnomAD", "AFneedLR" ],
-        in_occ: [ "AN", "AN" ],
-        out_occ: [ "ANgnomAD", "ANneedLR" ],
-        prefix: "cohort",
-        svdb_query_args: "--bnd_distance 2500 \
-            --overlap 0.8"
-    ]
-    // svdb_query_sample = [
-    //     // uses params.svdb_query by default
-    //     svdb_query_args: params.svdb_query.svdb_query_args
-    // ]
-
-    // bcftools filtering
-    bcftools_view_sample = [
-        regions: null,
-        targets: null,
-        samples: null,
-        bcftools_view_sample_args: """--include 'INFO/SUPP>=1'""",
-    ]
-
-    bcftools_view_cohort = [
-        regions: null,
-        targets: null,
-        samples: null,
-        bcftools_view_cohort_args: """--include '(INFO/AFgnomAD="." || INFO/AFgnomAD<0.1) && (INFO/AFneedLR="." || INFO/AFneedLR<0.006)'""",
-    ]
-    // bcftools_view_per_sample = [
-    //     regions: null,
-    //     targets: null,
-    //     samples: null,
-    //     bcftools_view_per_sample_args: params.bcftools_view_cohort?.bcftools_view_cohort_args ?: """--include '(INFO/AFgnomAD="." || INFO/AFgnomAD<0.1) && (INFO/AFneedLR="." || INFO/AFneedLR<0.006)'""",
-    // ]
-
-    // AnnotSV
-    annotsv = [
-        genome_build: "GRCh38",
-        annotations: null,
-        candidate_genes: null,
-        false_positive_snv: null,
-        gene_transcripts: null,
-        annotsv_args: """-vcf 1 \
-            -SVminSize 0"""
-    ]
-
-}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 4ca716c..c981b2a 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -388,5 +388,4 @@ validation {
 }
 
 // Load modules.config for DSL2 module specific options
-// includeConfig 'conf/modules_params.config'
 includeConfig 'conf/modules.config'

From 217a971e1abad0aca77c790d281c3206d2502cef Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Fri, 26 Sep 2025 14:49:38 +0200
Subject: [PATCH 06/11] update configs and singluarity profile

---
 conf/base.config |  4 +--
 nextflow.config  | 66 ++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 6d96e3b..4c08a1f 100755
--- a/conf/base.config
+++ b/conf/base.config
@@ -13,10 +13,10 @@ process {
     // TODO nf-core: Check the defaults for all processes
     cpus   = { 1      * task.attempt }
     memory = { 6.GB   * task.attempt }
-    // time   = { 4.h    * task.attempt }
+    time   = { 4.h    * task.attempt }
 
     errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' }
-    maxRetries    = 1
+    maxRetries    = 2
     maxErrors     = '-1'
 
     // Process-specific resource requirements
diff --git a/nextflow.config b/nextflow.config
index c981b2a..9188040 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -183,21 +183,61 @@ profiles {
         shifter.enabled         = false
         charliecloud.enabled    = false
         apptainer.enabled       = false
-            process {
-                executor = 'lsf'
-                // scratch = '/omics/odcf/analysis/OE0415_projects/nb_lrs/LRS/2025-02-01_NB_LRS/05-analysis_tmp/nf-core-ontvar-test/run_amt/nextflow.config'
-                scratch = '/omics/odcf/analysis/OE0415_projects/nb_lrs/LRS/2025-02-01_NB_LRS/05-analysis_tmp/nf-core-ontvar-test/tmp'
-                memory = { 25.GB * task.attempt }
-                queue = { task.attempt <= 2 ? 'medium-debian' : 'long-debian' }
-                cpus = { Math.min(15 * task.attempt, 48) }
-                errorStrategy = 'retry'
-                maxRetries = 2
+        
+        process {
+            executor = 'lsf'
+            scratch = '/omics/odcf/analysis/OE0415_projects/nb_lrs/LRS/2025-02-01_NB_LRS/05-analysis_tmp/nf-core-ontvar-test/tmp'
+            
+            // Queue selection based on your cluster configuration
+            queue = { 
+                def mem = (task.memory ?: 8.GB).toGiga()
+                def hours = (task.time ?: 6.h).toHours()
+                
+                if (hours <= 10.0/60.0) { 
+                    'short'
+                } else if (hours <= 1.0) {  // 1 hour
+                    'medium'
+                } else if (hours <= 10.0) {  // 10 hours
+                    'long'
+                } else if (mem > 200) {  // More than 200GB memory
+                    'highmem'
+                } else {  // Unlimited time, up to 200GB memory
+                    'verylong'
+                }
+            }
+            
+            // Resource limits based on your cluster specs
+            memory = { 
+                def mem = (task.memory ?: 8.GB).toGiga()
+                if (mem > 200) {
+                    // highmem queue - up to 4TB available but no hard limit
+                    return Math.min(mem, 4000) + '.GB'
+                } else {
+                    // Other queues - 200GB limit
+                    return Math.min(mem, 200) + '.GB'
+                }
             }
-            executor {
-                name = 'lsf'
-                perTaskReserve = false
-                perJobMemLimit = true
+            cpus = { Math.min(task.cpus ?: 2, 10) }  // Max 10 cores per your cluster config
+            time = { 
+                def hours = (task.ext.time ?: 10.h).toHours()  // Use task.ext.time or a fixed default
+                if (hours <= 10.0/60.0) {
+                    '10.m'  // short queue
+                } else if (hours <= 1.0) {
+                    '1.h'   // medium queue
+                } else if (hours <= 10.0) {
+                    '10.h'  // long queue
+                } else {
+                    '240.h' // verylong - set reasonable max
+                }
             }
+        }
+        
+        executor {
+            name = 'lsf'
+            perTaskReserve = false
+            perJobMemLimit = true
+            queueSize = 100
+        }
     }
     podman {
         podman.enabled          = true

From 643e3eef18b8e6400e1dc64d07eb690bcb506532 Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Mon, 29 Sep 2025 10:57:02 +0200
Subject: [PATCH 07/11] update summary logic

---
 conf/modules.config                       |  74 ++--
 modules/local/summarize_sv_counts/main.nf | 490 +++++++++-------------
 nextflow.config                           |   5 +-
 workflows/ontvar.nf                       |  32 +-
 4 files changed, 256 insertions(+), 345 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index dc964dd..61d9a9f 100755
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -32,8 +32,8 @@ process {
 
     // Raw SV caller outputs (don't publish - intermediate)
     withName: 'SNIFFLES' {
-        cpus = 3
-        memory = 3.GB
+        cpus = 12
+        memory = { 24.GB * task.attempt }
         ext.prefix = { "${meta.sample ?: meta.id}_sniffles" }
         ext.args = {
             def base_args = "--mapq ${params.mapq} --minsvlen ${params.minsvlen} --minsupport ${params.minsupport} --cluster-merge-pos ${params.cluster_merge_pos}"
@@ -47,8 +47,8 @@ process {
     }
 
     withName: 'CUTESV' {
-        cpus = 4
-        memory = 12.GB
+        cpus = 8
+        memory = { 16.GB * task.attempt }
         ext.prefix = { "${meta.sample ?: meta.id}_cutesv" }
         ext.args = {
             def base_args = "--min_support ${params.cutesv_min_support} --min_size ${params.min_size} --min_mapq ${params.cutesv_min_mapq}"
@@ -64,8 +64,8 @@ process {
     }
 
     withName: 'SEVERUS_WITH_CONTROL' {
-        cpus = 4
-        memory = 5.GB
+        cpus = 10
+        memory = { 26.GB * task.attempt }
         ext.prefix = { "${meta.sample ?: meta.id}_tn_severus" }
         ext.args = {
             def base_args = "--min-sv-size ${params.min_sv_size} --min-mapq ${params.severus_min_mapq} --min-support ${params.severus_min_support}"
@@ -78,8 +78,8 @@ process {
     }
 
     withName: 'SEVERUS_NO_CONTROL' {
-        cpus = 4
-        memory = 5.GB
+        cpus = 10
+        memory = { 26.GB * task.attempt }
         ext.prefix = { "${meta.sample ?: meta.id}_to_severus" }
         ext.args = {
             def base_args = "--min-sv-size ${params.min_sv_size} --PON \"${params.PON}\" --min-mapq ${params.severus_min_mapq} --min-support ${params.severus_min_support}"
@@ -94,7 +94,7 @@ process {
     // Header-renamed VCFs (these are the actual processed caller outputs)
     withName: 'RENAME_VCF_HEADERS_SNIFFLES' {
         cpus = 1
-        memory = 1.GB
+        memory = { 1.GB * task.attempt }
         ext.args = ""
         publishDir = [
             path: { "${params.outdir}/case/01_raw_calls/sniffles" },
@@ -105,7 +105,7 @@ process {
 
     withName: 'RENAME_VCF_HEADERS_CUTESV' {
         cpus = 1
-        memory = 1.GB
+        memory = { 1.GB * task.attempt }
         ext.args = ""
         publishDir = [
             path: { "${params.outdir}/case/01_raw_calls/cutesv" },
@@ -116,7 +116,7 @@ process {
 
     withName: 'RENAME_VCF_HEADERS_SEVERUS' {
         cpus = 1
-        memory = 1.GB
+        memory = { 1.GB * task.attempt }
         ext.args = ""
         publishDir = [
             enabled: false  // Don't publish severus outputs
@@ -126,7 +126,7 @@ process {
     // RENAME_VCF - This is the final Severus output step
     withName: 'RENAME_VCF' {
         cpus = 1
-        memory = 1.GB
+        memory = { 1.GB * task.attempt }
         ext.args = ""
         ext.when = { task.ext.meta = meta; true }  // Pass meta to task.ext for use in publishDir
         publishDir = [
@@ -152,8 +152,8 @@ process {
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
     withName: 'JASMINESV_SAMPLE' {
-        cpus = 3
-        memory = 3.GB
+        cpus = 12
+        memory = { 14.GB * task.attempt }
         ext.args = {
             def base_args = "--min_overlap ${params.min_overlap} --spec_len ${params.spec_len} --min_seq_id ${params.min_seq_id}"
             base_args += " --max_dist_linear ${params.max_dist_linear} --max_dist ${params.max_dist} --min_dist ${params.min_dist}"
@@ -174,6 +174,8 @@ process {
 
     // Sample consensus filtering and processing
     withName: 'JASMINE_HEADER_FIX' {
+        cpus = 1
+        memory = { 3.GB * task.attempt }
         publishDir = [
             enabled: false  // Intermediate step
         ]
@@ -188,12 +190,16 @@ process {
     }
 
     withName: 'FILTER_CHR' {
+        cpus = 1
+        memory = { 3.GB * task.attempt }
         publishDir = [
             enabled: false  // Intermediate step
         ]
     }
 
     withName: 'SORT_VCF' {
+        cpus = 1
+        memory = { 3.GB * task.attempt }
         publishDir = [
             enabled: false  // Intermediate step
         ]
@@ -204,8 +210,8 @@ process {
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
     withName: 'CALLER_SUPPORT_FILTER' {
-        cpus = 2
-        memory = 2.GB
+        cpus = 4
+        memory = { 3.GB * task.attempt }
         ext.args = {
             // Build dynamic expression using min_caller_support parameter
             def support_expr = "INFO/SUPP>=${params.min_caller_support}"
@@ -224,8 +230,8 @@ process {
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
     withName: 'SVDB_QUERY_SAMPLE' {
-        cpus = 4
-        memory = 3.GB
+        cpus = 12
+        memory = { 16.GB * task.attempt }
         ext.args = {
             def base_args = "--bnd_distance ${params.bnd_distance} --overlap ${params.overlap}"
             def user_args = params.svdb_query_args ? params.svdb_query_args : ""
@@ -238,8 +244,8 @@ process {
     }
 
     withName: 'ANNOTSV_PER_SAMPLE_RAW' {
-        cpus = 6
-        memory = 8.GB
+        cpus = 3
+        memory = { 8.GB * task.attempt }
         ext.args = {
             def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.min_sv_size}"
             def user_args = params.annotsv_args ? params.annotsv_args : ""
@@ -263,8 +269,8 @@ process {
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
     withName: 'AF_FILTER' {
-        cpus = 2
-        memory = 2.GB
+        cpus = 4
+        memory = { 3.GB * task.attempt }
         ext.args = {
             // Build dynamic AF filter expression using threshold parameters
             def af_expr = "(INFO/AFgnomAD=\".\" || INFO/AFgnomAD<${params.max_gnomad_af}) && (INFO/AFneedLR=\".\" || INFO/AFneedLR<${params.max_needlr_af})"
@@ -279,8 +285,8 @@ process {
     }
 
     withName: 'ANNOTSV_PER_SAMPLE' {
-        cpus = 6
-        memory = 8.GB
+        cpus = 3
+        memory = { 8.GB * task.attempt }
         ext.args = {
             def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.min_sv_size}"
             def user_args = params.annotsv_args ? params.annotsv_args : ""
@@ -304,8 +310,8 @@ process {
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
     withName: 'JASMINESV_COHORT' {
-        cpus = 3
-        memory = 3.GB
+        cpus = 12
+        memory = { 14.GB * task.attempt }
         ext.args = {
             def base_args = "--min_overlap ${params.min_overlap} --spec_len ${params.spec_len} --min_seq_id ${params.min_seq_id}"
             base_args += " --max_dist_linear ${params.max_dist_linear} --max_dist ${params.max_dist} --min_dist ${params.min_dist}"
@@ -325,8 +331,8 @@ process {
     }
     
     withName: 'SVDB_QUERY_COHORT' {
-        cpus = 4
-        memory = 3.GB
+        cpus = 12
+        memory = { 16.GB * task.attempt }
         ext.args = {
             def base_args = "--bnd_distance ${params.bnd_distance} --overlap ${params.overlap}"
             def user_args = params.svdb_query_args ? params.svdb_query_args : ""
@@ -376,7 +382,7 @@ process {
     
     withName: 'ANNOTSV_INSTALLANNOTATIONS' {
         cpus = 1
-        memory = 4.GB
+        memory = { 4.GB * task.attempt }
         time = '1h'
         publishDir = [
             path: { "${params.outdir ?: './results'}" },
@@ -394,7 +400,7 @@ process {
     // Raw calls summary (in case/01-raw-calls/)
     withName: 'SUMMARIZE_CALLERS' {
         cpus = 1
-        memory = 1.GB
+        memory = { 3.GB * task.attempt }
         publishDir = [
             path: { "${params.outdir}/case/01_raw_calls" },
             mode: params.publish_dir_mode,
@@ -405,7 +411,7 @@ process {
     // Caller merged summary (in case/02-caller-merged/)
     withName: 'SUMMARIZE_CALLER_MERGED' {
         cpus = 1
-        memory = 1.GB
+        memory = { 3.GB * task.attempt }
         publishDir = [
             path: { "${params.outdir}/case/02_caller_merged" },
             mode: params.publish_dir_mode,
@@ -416,7 +422,7 @@ process {
     // Filtered merged summary (in case/04-caller-merged-filtered/)
     withName: 'SUMMARIZE_CALLER_MERGED_FILTERED' {
         cpus = 1
-        memory = 1.GB
+        memory = { 3.GB * task.attempt }
         publishDir = [
             path: { "${params.outdir}/case/03_caller_merged_filtered" },
             mode: params.publish_dir_mode,
@@ -427,7 +433,7 @@ process {
     // Cohort summary (in cohort/)
     withName: 'SUMMARIZE_COHORT' {
         cpus = 1
-        memory = 1.GB
+        memory = { 3.GB * task.attempt }
         publishDir = [
             path: { "${params.outdir}/cohort" },
             mode: params.publish_dir_mode,
@@ -438,7 +444,7 @@ process {
     // Utility modules that don't need to be published
     withName: 'UNTAR.*' {
         cpus = 1
-        memory = 1.GB
+        memory = { 1.GB * task.attempt }
         ext.args = ""
         publishDir = [
             enabled: false  // Don't publish intermediate untar files
diff --git a/modules/local/summarize_sv_counts/main.nf b/modules/local/summarize_sv_counts/main.nf
index 3385718..0d5fb80 100755
--- a/modules/local/summarize_sv_counts/main.nf
+++ b/modules/local/summarize_sv_counts/main.nf
@@ -16,334 +16,230 @@ process SUMMARIZE_SV_COUNTS {
     path "versions.yml", emit: versions
 
     script:
-    // Smart detection of input type
-    def is_list = vcf_input instanceof List || (vcf_input instanceof String && vcf_input.contains(' '))
-    def vcf_files = is_list ? (vcf_input instanceof List ? vcf_input.join(' ') : vcf_input) : vcf_input.toString()
+    def is_list = vcf_input instanceof List
+    def vcf_files = is_list ? vcf_input.join(' ') : vcf_input.toString()
     """
     python3 << 'EOF'
 import sys, json, statistics, subprocess, os
 from collections import defaultdict, OrderedDict
 
 def analyze_vcf(vcf_file):
-    if not os.path.exists(vcf_file) or os.path.getsize(vcf_file) == 0:
-        print(f"Warning: VCF file {vcf_file} is empty or does not exist")
-        return {"total_variants": 0, "sv_types": {}}
-        
-    # Check VCF header for available fields
-    header_cmd = f"bcftools view -h {vcf_file}"
-    try:
-        header_result = subprocess.run(header_cmd, shell=True, capture_output=True, text=True, timeout=30)
-        if header_result.returncode != 0:
-            print(f"Warning: Could not read header from {vcf_file}")
-            return {"total_variants": 0, "sv_types": {}}
-    except Exception as e:
-        print(f"Warning: Error reading {vcf_file}: {e}")
-        return {"total_variants": 0, "sv_types": {}}
-    
-    # Determine available INFO fields
-    has_chr2 = '##INFO=<ID=CHR2' in header_result.stdout
-    has_end = '##INFO=<ID=END' in header_result.stdout
-    has_svlen = '##INFO=<ID=SVLEN' in header_result.stdout
-    has_mateid = '##INFO=<ID=MATEID' in header_result.stdout
-    has_event = '##INFO=<ID=EVENT' in header_result.stdout
-    
-    print(f"Analyzing {os.path.basename(vcf_file)}: CHR2={has_chr2}, END={has_end}, SVLEN={has_svlen}, MATEID={has_mateid}, EVENT={has_event}")
-    
-    # Build query format based on available fields
-    query_fields = ['%CHROM', '%POS', '%ID', '%INFO/SVTYPE']
-    if has_chr2:
-        query_fields.append('%INFO/CHR2')
-    if has_end:
-        query_fields.append('%INFO/END')
-    if has_svlen:
-        query_fields.append('%INFO/SVLEN')
-    if has_mateid:
-        query_fields.append('%INFO/MATEID')
-    if has_event:
-        query_fields.append('%INFO/EVENT')
+    result = {
+        "total_variants": 0,
+        "sv_types": defaultdict(lambda: {"count": 0, "svlen_data": []})
+    }
     
-    query_format = '\\t'.join(query_fields) + '\\n'
+    if not vcf_file or not os.path.exists(vcf_file):
+        return result
     
-    # Query VCF data
-    query_cmd = f"bcftools query -f '{query_format}' {vcf_file}"
     try:
-        query_result = subprocess.run(query_cmd, shell=True, capture_output=True, text=True, timeout=60)
-        if query_result.returncode != 0:
-            print(f"Warning: Could not query {vcf_file}")
-            return {"total_variants": 0, "sv_types": {}}
-    except Exception as e:
-        print(f"Warning: Error querying {vcf_file}: {e}")
-        return {"total_variants": 0, "sv_types": {}}
-    
-    sv_counts = defaultdict(int)
-    svlen_dict = defaultdict(list)
-    processed_events = set()  # Track processed breakend pairs
-    bnd_pairs = {}           # Track BND mate relationships
-    processed_bnd_ids = set() # Track processed BND IDs
-    
-    total_lines = 0
-    deduplicated_count = 0
-    
-    for line in query_result.stdout.strip().split('\\n'):
-        if not line:
-            continue
-        total_lines += 1
-        fields = line.split('\\t')
-        if len(fields) < 4:
-            continue
-            
-        chrom = fields[0]
-        pos = int(fields[1]) if fields[1].isdigit() else 0
-        variant_id = fields[2]
-        svtype = fields[3]
-        
-        if not svtype or svtype == '.':
-            continue
-        
-        # Parse additional fields based on what's available
-        field_idx = 4
-        chr2 = fields[field_idx] if has_chr2 and field_idx < len(fields) else "."
-        field_idx += 1 if has_chr2 else 0
-        
-        end = fields[field_idx] if has_end and field_idx < len(fields) else "."
-        field_idx += 1 if has_end else 0
-        
-        svlen = fields[field_idx] if has_svlen and field_idx < len(fields) else None
-        field_idx += 1 if has_svlen else 0
-        
-        mateid = fields[field_idx] if has_mateid and field_idx < len(fields) else "."
-        field_idx += 1 if has_mateid else 0
-        
-        event_id = fields[field_idx] if has_event and field_idx < len(fields) else "."
-        
-        # Enhanced breakend/translocation handling
-        if svtype in {"BND", "TRA"}:
-            should_count = True
-            
-            # Strategy 1: Use EVENT ID if available (JASMINE often provides this)
-            if event_id != "." and event_id:
-                if event_id in processed_events:
-                    should_count = False
-                    deduplicated_count += 1
-                else:
-                    processed_events.add(event_id)
-                    
-            # Strategy 2: Use MATEID for BND pairs
-            elif svtype == "BND" and mateid != "." and mateid:
-                if variant_id in processed_bnd_ids or mateid in processed_bnd_ids:
-                    should_count = False
-                    deduplicated_count += 1
-                else:
-                    processed_bnd_ids.add(variant_id)
-                    processed_bnd_ids.add(mateid)
-                    svtype = "TRA"  # Count BND pairs as TRA
-                    
-            # Strategy 3: Use position-based deduplication for TRA
-            elif svtype == "TRA" and chr2 != "." and end != ".":
-                try:
-                    end_pos = int(end)
-                    # Create a canonical key (always smaller chromosome/position first)
-                    if chrom < chr2 or (chrom == chr2 and pos < end_pos):
-                        key = (chrom, pos, chr2, end_pos)
-                    else:
-                        key = (chr2, end_pos, chrom, pos)
-                    
-                    if key in processed_events:
-                        should_count = False
-                        deduplicated_count += 1
-                    else:
-                        processed_events.add(key)
-                except (ValueError, TypeError):
-                    # Fallback: count as single event
-                    pass
-                    
-            # Strategy 4: For other cases, use ID-based deduplication
-            else:
-                # Check if this looks like a mate pair ID (ends with _1, _2, etc.)
-                if variant_id.endswith(('_1', '_2', '.1', '.2')):
-                    base_id = variant_id[:-2]
-                    if base_id in processed_events:
-                        should_count = False
-                        deduplicated_count += 1
-                    else:
-                        processed_events.add(base_id)
-            
-            if should_count:
-                sv_counts[svtype] += 1
+        with open(vcf_file, 'r') as f:
+            for line in f:
+                if line.startswith('#'):
+                    continue
                 
-        else:
-            # Standard SV types (DEL, DUP, INS, INV)
-            sv_counts[svtype] += 1
-        
-        # Collect SVLEN data (with filtering for unrealistic values)
-        if svlen not in (None, ".", ""):
-            try:
-                svlen_val = abs(int(float(svlen)))
-                # Filter out unrealistic SVLEN values for BND/TRA
-                if svtype in {"BND", "TRA"}:
-                    # For translocations, very large SVLEN values are often artifacts
-                    # Only include if SVLEN seems reasonable (< 1MB for most cases)
-                    if svlen_val < 1000000:  # 1MB cutoff
-                        svlen_dict[svtype].append(svlen_val)
-                else:
-                    # For other SV types, include all positive SVLEN values
-                    if svlen_val > 0:
-                        svlen_dict[svtype].append(svlen_val)
-            except (ValueError, TypeError):
-                pass
-    
-    # Build result structure
-    sv_types = {}
-    for svtype in sorted(sv_counts.keys()):  # Sort for consistent ordering
-        entry = {"count": sv_counts[svtype]}
-        svlens = svlen_dict.get(svtype, [])
-        if svlens:
-            entry["svlen_min"] = min(svlens)
-            entry["svlen_max"] = max(svlens)
-            entry["svlen_mean"] = round(statistics.mean(svlens), 2)
-            entry["svlen_median"] = statistics.median(svlens)
-            entry["svlen_stdev"] = round(statistics.stdev(svlens) if len(svlens) > 1 else 0, 2)
-        else:
-            entry.update({
-                "svlen_min": None, "svlen_max": None, "svlen_mean": None,
-                "svlen_median": None, "svlen_stdev": None
-            })
-        sv_types[svtype] = entry
-    
-    total_variants = sum(sv_counts.values())
-    print(f"Processed {os.path.basename(vcf_file)}: {total_lines} raw variants -> {total_variants} counted variants")
-    if deduplicated_count > 0:
-        print(f"  - Deduplicated {deduplicated_count} breakend/translocation events")
+                fields = line.strip().split('\\t')
+                if len(fields) < 8:
+                    continue
+                
+                result["total_variants"] += 1
+                info = fields[7]
+                
+                svtype = "UNK"
+                for item in info.split(';'):
+                    if item.startswith('SVTYPE='):
+                        svtype = item.split('=')[1]
+                        break
+                
+                result["sv_types"][svtype]["count"] += 1
+                
+                for item in info.split(';'):
+                    if item.startswith('SVLEN='):
+                        try:
+                            svlen = abs(int(item.split('=')[1]))
+                            result["sv_types"][svtype]["svlen_data"].append(svlen)
+                        except:
+                            pass
+                        break
     
-    return {
-        "total_variants": total_variants,
-        "sv_types": sv_types
-    }
+    except Exception as e:
+        print(f"Error processing {vcf_file}: {e}")
+    
+    # Compute statistics
+    for svtype, data in result["sv_types"].items():
+        if data["svlen_data"]:
+            lengths = data["svlen_data"]
+            data["svlen_min"] = min(lengths)
+            data["svlen_max"] = max(lengths)
+            data["svlen_mean"] = round(statistics.mean(lengths), 2)
+            data["svlen_median"] = round(statistics.median(lengths), 1)
+            data["svlen_stdev"] = round(statistics.stdev(lengths) if len(lengths) > 1 else 0, 2)
+        del data["svlen_data"]
+    
+    return result
 
-# Smart processing based on input type
+# Get inputs
 vcf_files_str = "${vcf_files}"
 stage = "${stage_name}"
-
-# Determine if this is multi-caller or single VCF analysis
 vcf_list = [f.strip() for f in vcf_files_str.split() if f.strip()]
-is_multi_caller = len(vcf_list) > 1
 
-print(f"Processing stage: {stage}")
-print(f"Analysis mode: {'multi_caller' if is_multi_caller else 'single_vcf'}")
-print(f"VCF files to process: {len(vcf_list)}")
+# Detect if we should group by sample based on filenames
+sample_data = {}
+for vcf_file in vcf_list:
+    # Extract sample ID from filename
+    basename = os.path.basename(vcf_file)
+    if '_' in basename:
+        sample_id = basename.split('_')[0]  # e.g., SAMPLE_caller.vcf
+    else:
+        sample_id = basename.split('.')[0]   # fallback to full basename without extension
+    
+    if sample_id not in sample_data:
+        sample_data[sample_id] = []
+    sample_data[sample_id].append(vcf_file)
+
+# Only use sample grouping if we have multiple samples or multiple VCFs per sample
+use_sample_grouping = len(sample_data) > 1 or any(len(vcfs) > 1 for vcfs in sample_data.values())
 
-if is_multi_caller:
-    # Multi-caller analysis (for raw calls)
+if use_sample_grouping:
+    # Multi-sample nested analysis
     result = OrderedDict([
         ("stage", stage),
-        ("analysis_type", "multi_caller"),
-        ("callers", {}),
-        ("combined_stats", {
-            "total_variants": 0,
-            "sv_types": defaultdict(lambda: {"count": 0, "svlen_data": []})
-        })
+        ("analysis_type", "multi_sample"),
+        ("samples", {})
     ])
     
-    # Analyze each VCF file (caller)
-    for vcf_file in vcf_list:
-        if not vcf_file or not os.path.exists(vcf_file):
-            print(f"Warning: VCF file {vcf_file} not found")
-            continue
+    for sample_id, sample_vcfs in sample_data.items():
+        print(f"Processing sample: {sample_id}")
+        sample_vcfs_list = sample_vcfs if isinstance(sample_vcfs, list) else [sample_vcfs]
         
-        # Extract caller name from filename
-        basename = os.path.basename(vcf_file)
-        if 'sniffles' in basename.lower():
-            caller = 'sniffles'
-        elif 'cutesv' in basename.lower():
-            caller = 'cutesv'
-        elif 'severus' in basename.lower():
-            caller = 'severus'
-        elif 'jasmine' in basename.lower() or 'consensus' in basename.lower():
-            caller = 'consensus'
+        if len(sample_vcfs_list) > 1:
+            # Multi-caller for this sample
+            sample_result = OrderedDict([
+                ("analysis_type", "multi_caller"),
+                ("callers", {}),
+                ("combined_stats", {
+                    "total_variants": 0,
+                    "sv_types": defaultdict(lambda: {"count": 0, "svlen_data": []})
+                })
+            ])
+            
+            caller_map = {}
+            for vcf_file in sample_vcfs_list:
+                vcf_name = os.path.basename(str(vcf_file)).lower()
+                if 'sniffles' in vcf_name:
+                    caller_map[vcf_file] = 'sniffles'
+                elif 'cutesv' in vcf_name:
+                    caller_map[vcf_file] = 'cutesv'
+                elif 'severus' in vcf_name:
+                    caller_map[vcf_file] = 'severus'
+                else:
+                    caller_map[vcf_file] = 'unknown'
+            
+            for vcf_file, caller in caller_map.items():
+                analyzed_data = analyze_vcf(str(vcf_file))
+                sample_result["callers"][caller] = analyzed_data
+                
+                sample_result["combined_stats"]["total_variants"] += analyzed_data["total_variants"]
+                for svtype, data in analyzed_data["sv_types"].items():
+                    sample_result["combined_stats"]["sv_types"][svtype]["count"] += data["count"]
+                    if "svlen_data" in data:
+                        sample_result["combined_stats"]["sv_types"][svtype]["svlen_data"].extend(data["svlen_data"])
+            
+            # Compute combined stats
+            for svtype, data in sample_result["combined_stats"]["sv_types"].items():
+                if data["svlen_data"]:
+                    lengths = data["svlen_data"]
+                    data["svlen_min"] = min(lengths)
+                    data["svlen_max"] = max(lengths)
+                    data["svlen_mean"] = round(statistics.mean(lengths), 2)
+                    data["svlen_median"] = round(statistics.median(lengths), 1)
+                    data["svlen_stdev"] = round(statistics.stdev(lengths) if len(lengths) > 1 else 0, 2)
+                    del data["svlen_data"]
+            
         else:
-            # Fallback: use filename without extension
-            caller = basename.split('.')[0].split('_')[-1]
-        
-        print(f"\\nProcessing {caller}: {basename}")
-        
-        # Analyze this VCF with enhanced deduplication
-        stats = analyze_vcf(vcf_file)
-        result["callers"][caller] = stats
+            # Single VCF for this sample
+            analyzed_data = analyze_vcf(str(sample_vcfs_list[0]))
+            sample_result = OrderedDict([
+                ("analysis_type", "single_vcf"),
+                ("total_variants", analyzed_data["total_variants"]),
+                ("sv_types", analyzed_data["sv_types"])
+            ])
         
-        # Add to combined stats
-        result["combined_stats"]["total_variants"] += stats["total_variants"]
-        for svtype, data in stats["sv_types"].items():
-            result["combined_stats"]["sv_types"][svtype]["count"] += data["count"]
-            if data.get("svlen_min") is not None:
-                # Collect all SVLEN statistics for later aggregation
-                svlen_values = []
-                if data.get("svlen_min") is not None:
-                    svlen_values.append(data["svlen_min"])
-                if data.get("svlen_max") is not None:
-                    svlen_values.append(data["svlen_max"])
-                if data.get("svlen_mean") is not None:
-                    svlen_values.append(data["svlen_mean"])
-                if data.get("svlen_median") is not None:
-                    svlen_values.append(data["svlen_median"])
-                result["combined_stats"]["sv_types"][svtype]["svlen_data"].extend(svlen_values)
-    
-    # Finalize combined stats
-    final_combined = {"total_variants": result["combined_stats"]["total_variants"], "sv_types": {}}
-    for svtype in sorted(result["combined_stats"]["sv_types"].keys()):
-        data = result["combined_stats"]["sv_types"][svtype]
-        entry = {"count": data["count"]}
-        svlen_data = [x for x in data["svlen_data"] if x is not None and x > 0]
-        if svlen_data:
-            entry.update({
-                "svlen_min": min(svlen_data),
-                "svlen_max": max(svlen_data),
-                "svlen_mean": round(statistics.mean(svlen_data), 2),
-                "svlen_median": statistics.median(svlen_data),
-                "svlen_stdev": round(statistics.stdev(svlen_data) if len(svlen_data) > 1 else 0, 2)
-            })
-        else:
-            entry.update({
-                "svlen_min": None, "svlen_max": None, "svlen_mean": None,
-                "svlen_median": None, "svlen_stdev": None
-            })
-        final_combined["sv_types"][svtype] = entry
-    
-    result["combined_stats"] = final_combined
+        result["samples"][sample_id] = sample_result
 
 else:
-    # Single VCF analysis (for consensus stages) - CONSISTENT ORDERING
-    vcf_file = vcf_list[0] if vcf_list else ""
-    print(f"\\nProcessing single VCF: {os.path.basename(vcf_file) if vcf_file else 'None'}")
-    
-    analyzed_data = analyze_vcf(vcf_file)
-    
-    # Build result with consistent field ordering using OrderedDict
-    result = OrderedDict([
-        ("stage", stage),
-        ("analysis_type", "single_vcf"),
-        ("total_variants", analyzed_data["total_variants"]),
-        ("sv_types", analyzed_data["sv_types"])
-    ])
+    # Regular analysis (cohort-level or legacy)
+    if len(vcf_list) > 1:
+        # Multi-caller analysis
+        result = OrderedDict([
+            ("stage", stage),
+            ("analysis_type", "multi_caller"),
+            ("callers", {}),
+            ("combined_stats", {
+                "total_variants": 0,
+                "sv_types": defaultdict(lambda: {"count": 0, "svlen_data": []})
+            })
+        ])
+        
+        caller_map = {}
+        for vcf_file in vcf_list:
+            vcf_name = os.path.basename(vcf_file).lower()
+            if 'sniffles' in vcf_name:
+                caller_map[vcf_file] = 'sniffles'
+            elif 'cutesv' in vcf_name:
+                caller_map[vcf_file] = 'cutesv'
+            elif 'severus' in vcf_name:
+                caller_map[vcf_file] = 'severus'
+            else:
+                caller_map[vcf_file] = 'unknown'
+        
+        for vcf_file, caller in caller_map.items():
+            analyzed_data = analyze_vcf(vcf_file)
+            result["callers"][caller] = analyzed_data
+            
+            result["combined_stats"]["total_variants"] += analyzed_data["total_variants"]
+            for svtype, data in analyzed_data["sv_types"].items():
+                result["combined_stats"]["sv_types"][svtype]["count"] += data["count"]
+                if "svlen_data" in data:
+                    result["combined_stats"]["sv_types"][svtype]["svlen_data"].extend(data["svlen_data"])
+        
+        # Compute combined statistics
+        for svtype, data in result["combined_stats"]["sv_types"].items():
+            if data["svlen_data"]:
+                lengths = data["svlen_data"]
+                data["svlen_min"] = min(lengths)
+                data["svlen_max"] = max(lengths)
+                data["svlen_mean"] = round(statistics.mean(lengths), 2)
+                data["svlen_median"] = round(statistics.median(lengths), 1)
+                data["svlen_stdev"] = round(statistics.stdev(lengths) if len(lengths) > 1 else 0, 2)
+                del data["svlen_data"]
+    
+    else:
+        # Single VCF analysis
+        vcf_file = vcf_list[0] if vcf_list else ""
+        analyzed_data = analyze_vcf(vcf_file)
+        
+        result = OrderedDict([
+            ("stage", stage),
+            ("analysis_type", "single_vcf"),
+            ("total_variants", analyzed_data["total_variants"]),
+            ("sv_types", analyzed_data["sv_types"])
+        ])
 
-# Write output with consistent formatting
+# Write output
 with open("${stage_name}_summary.json", "w") as f:
     json.dump(result, f, indent=2)
-    
-print(f"\\n=== SUMMARY ===")
-print(f"Generated summary for stage: {stage}")
-print(f"Analysis type: {'multi_caller' if is_multi_caller else 'single_vcf'}")
-print(f"Total variants: {result.get('total_variants', 0)}")
 
-if is_multi_caller:
-    print("Per-caller breakdown:")
-    for caller, stats in result.get("callers", {}).items():
-        print(f"  {caller}: {stats.get('total_variants', 0)} variants")
+print(f"Generated summary for stage: {stage}")
+if 'samples' in result:
+    print(f"Samples processed: {len(result['samples'])}")
+    for sample_id, sample_data in result['samples'].items():
+        if 'total_variants' in sample_data:
+            print(f"  {sample_id}: {sample_data['total_variants']} variants")
+        elif 'combined_stats' in sample_data:
+            print(f"  {sample_id}: {sample_data['combined_stats']['total_variants']} variants")
 else:
-    print("SV type breakdown:")
-    for svtype, data in result.get("sv_types", {}).items():
-        print(f"  {svtype}: {data.get('count', 0)} variants")
-
-print("===============")
+    print(f"Total variants: {result.get('total_variants', 0)}")
 EOF
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/nextflow.config b/nextflow.config
index 9188040..deea6b9 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -80,7 +80,7 @@ params {
     svdb_query_args           = ""
 
     // Filtering parameters
-    min_caller_support           = 1
+    min_caller_support           = 2
     max_gnomad_af                = 0.1
     max_needlr_af                = 0.006
     caller_support_filter_args   = ""
@@ -189,8 +189,7 @@ profiles {
             scratch = '/omics/odcf/analysis/OE0415_projects/nb_lrs/LRS/2025-02-01_NB_LRS/05-analysis_tmp/nf-core-ontvar-test/tmp'
             
             // Queue selection based on your cluster configuration
-            queue = { 
-                def mem = (task.memory ?: 8.GB).toGiga()
+            queue = {
                 def hours = (task.time ?: 6.h).toHours()
                 
                 if (hours <= 10.0/60.0) { 
diff --git a/workflows/ontvar.nf b/workflows/ontvar.nf
index 2a03dff..88c066b 100755
--- a/workflows/ontvar.nf
+++ b/workflows/ontvar.nf
@@ -155,11 +155,14 @@ workflow ONTVAR {
     all_caller_vcfs = sniffles_renamed_vcfs.mix(cutesv_renamed_vcfs, severus_renamed_vcfs)
         .map { meta, vcf -> tuple(meta, vcf) }
 
-    // Raw caller summaries (multi-caller summary for 01-raw-calls directory)
+    // Raw caller summaries
+    all_caller_vcfs_for_summary = all_caller_vcfs
+        .map { meta, vcf -> vcf }
+        .collect()
+        .map { vcf_list -> tuple([id: "raw_caller_summary"], vcf_list) }
+
     SUMMARIZE_CALLERS(
-        all_caller_vcfs
-            .collect { meta, vcf -> vcf }
-            .map { vcf_list -> tuple([id: "raw_caller_summary"], vcf_list) },
+        all_caller_vcfs_for_summary,
         Channel.value("raw_calls")
     )
 
@@ -239,10 +242,14 @@ workflow ONTVAR {
         Channel.value([])  // samples
     )
 
+    // Consensus summary - simple approach
+    consensus_summary_input = CALLER_SUPPORT_FILTER.out.vcf
+        .map { meta, vcf -> vcf }
+        .collect()
+        .map { vcf_list -> tuple([id: "consensus_summary"], vcf_list) }
+
     SUMMARIZE_CALLER_MERGED(
-        CALLER_SUPPORT_FILTER.out.vcf
-            .first()
-            .map { meta, vcf -> tuple([id: "consensus_summary"], vcf) },
+        consensus_summary_input,
         Channel.value("consensus")
     )
 
@@ -290,11 +297,14 @@ workflow ONTVAR {
         ch_bcftools_samples
     )
 
-    // Filtered merged summary
+    // Filtered summary - simple approach
+    filtered_summary_input = AF_FILTER.out.vcf
+        .map { meta, vcf -> vcf }
+        .collect()
+        .map { vcf_list -> tuple([id: "filtered_summary"], vcf_list) }
+
     SUMMARIZE_CALLER_MERGED_FILTERED(
-        AF_FILTER.out.vcf
-            .first()
-            .map { meta, vcf -> tuple([id: "filtered_summary"], vcf) },
+        filtered_summary_input,
         Channel.value("filtered")
     )
 

From 6163328e6ddee03d96b27502a840a44eefa83f0f Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Mon, 29 Sep 2025 14:51:58 +0200
Subject: [PATCH 08/11] fix EOF issues with pre-commit

---
 .github/workflows/nf-test.yml                 |  2 +-
 CITATIONS.md                                  |  2 -
 assets/schema_input.json                      |  2 +-
 conf/modules.config                           | 16 +++---
 docs/output.md                                |  3 --
 modules/local/filter_chr/environment.yml      |  4 +-
 modules/local/filter_chr/main.nf              |  2 +-
 modules/local/filter_chr/meta.yml             |  2 +-
 .../local/jasmine_header_fix/environment.yml  | 14 ++---
 modules/local/jasmine_header_fix/main.nf      |  4 +-
 modules/local/rename_vcf/environment.yml      |  2 +-
 modules/local/rename_vcf/main.nf              |  6 +--
 modules/local/rename_vcf/meta.yml             |  2 +-
 modules/local/rename_vcf_headers/README.md    |  6 +++
 .../local/rename_vcf_headers/environment.yml  |  2 +-
 modules/local/rename_vcf_headers/main.nf      |  3 +-
 modules/local/summarize_sv_counts/README.md   |  1 +
 .../local/summarize_sv_counts/environment.yml |  2 +-
 modules/local/summarize_sv_counts/main.nf     | 54 +++++++++----------
 modules/local/summarize_sv_counts/meta.yml    |  2 +-
 modules/nf-core/severus/environment.yml       | 16 +++---
 nextflow.config                               | 22 ++++----
 nextflow_schema.json                          | 14 ++---
 workflows/ontvar.nf                           | 39 +++++++-------
 24 files changed, 111 insertions(+), 111 deletions(-)

diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
index 9d7642e..e7b5844 100755
--- a/.github/workflows/nf-test.yml
+++ b/.github/workflows/nf-test.yml
@@ -118,7 +118,7 @@ jobs:
   confirm-pass:
     needs: [nf-test]
     if: always()
-    runs-on:  # use self-hosted runners
+    runs-on: # use self-hosted runners
       - runs-on=${{ github.run_id }}-confirm-pass
       - runner=2cpu-linux-x64
     steps:
diff --git a/CITATIONS.md b/CITATIONS.md
index 647ec02..4972da7 100755
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -10,8 +10,6 @@
 
 ## Pipeline tools
 
-
-
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
 
 > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 6ab045e..3d45c92 100755
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -33,4 +33,4 @@
         },
         "required": ["group_id", "sample_id", "sample_type", "bam_path"]
     }
-}
\ No newline at end of file
+}
diff --git a/conf/modules.config b/conf/modules.config
index 61d9a9f..1c71ef2 100755
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -133,13 +133,13 @@ process {
             path: { "${params.outdir}/case/01_raw_calls/severus" },
             mode: params.publish_dir_mode,
             pattern: '*.{vcf,vcf.gz}',
-            saveAs: { filename -> 
+            saveAs: { filename ->
                 // Get meta from task.ext
                 def meta = task.ext.meta ?: [:]
-                
+
                 // Determine if this is tumor/normal or tumor-only
                 def has_control = meta.has_control ?: false
-                
+
                 // Organize into subdirectories
                 def subdir = has_control ? "tumor_normal" : "tumor_only"
                 "${subdir}/${filename}"
@@ -256,7 +256,7 @@ process {
             path: { "${params.outdir}/case/02_caller_merged" },
             mode: params.publish_dir_mode,
             pattern: '*.{tsv,vcf}',
-            saveAs: { filename -> 
+            saveAs: { filename ->
                 def meta = task.ext.meta ?: [:]
                 def group_id = meta.sample ?: meta.id ?: "unknown"
                 filename.replaceAll(/^[^.]+/, "${group_id}")
@@ -297,7 +297,7 @@ process {
             path: { "${params.outdir}/case/03_caller_merged_filtered" },
             mode: params.publish_dir_mode,
             pattern: '*.{tsv,vcf}',
-            saveAs: { filename -> 
+            saveAs: { filename ->
                 def meta = task.ext.meta ?: [:]
                 def group_id = meta.sample ?: meta.id ?: "unknown"
                 filename.replaceAll(/^[^.]+/, "${group_id}")
@@ -329,7 +329,7 @@ process {
             enabled: false  // Intermediate step
         ]
     }
-    
+
     withName: 'SVDB_QUERY_COHORT' {
         cpus = 12
         memory = { 16.GB * task.attempt }
@@ -370,7 +370,7 @@ process {
             path: { "${params.outdir}/cohort" },
             mode: params.publish_dir_mode,
             pattern: '*.{tsv,vcf}',
-            saveAs: { filename -> 
+            saveAs: { filename ->
                 filename.replaceAll(/^[^.]+/, 'cohort')
             }
         ]
@@ -379,7 +379,7 @@ process {
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // SUPPORTING FILES AND UTILITIES
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    
+
     withName: 'ANNOTSV_INSTALLANNOTATIONS' {
         cpus = 1
         memory = { 4.GB * task.attempt }
diff --git a/docs/output.md b/docs/output.md
index ef3340b..048256c 100755
--- a/docs/output.md
+++ b/docs/output.md
@@ -12,12 +12,9 @@ The directories listed below will be created in the results directory after the
 
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
-
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
-
-
 ### MultiQC
 
 <details markdown="1">
diff --git a/modules/local/filter_chr/environment.yml b/modules/local/filter_chr/environment.yml
index d945fdc..0a84f41 100644
--- a/modules/local/filter_chr/environment.yml
+++ b/modules/local/filter_chr/environment.yml
@@ -3,5 +3,5 @@ channels:
   - bioconda
   - conda-forge
 dependencies:
-  - htslib   # for bgzip and zcat
-  - awk
\ No newline at end of file
+  - htslib # for bgzip and zcat
+  - awk
diff --git a/modules/local/filter_chr/main.nf b/modules/local/filter_chr/main.nf
index 829b24b..266da5b 100644
--- a/modules/local/filter_chr/main.nf
+++ b/modules/local/filter_chr/main.nf
@@ -25,4 +25,4 @@ process FILTER_CHR {
         awk 'BEGIN{OFS="\\t"} /^#/ {print} /^chr([1-9]|1[0-9]|2[0-2]|X|Y|M)\\t/ {print}' ${vcf} | bgzip > ${vcf.baseName}.filtered.vcf.gz
     fi
     """
-}
\ No newline at end of file
+}
diff --git a/modules/local/filter_chr/meta.yml b/modules/local/filter_chr/meta.yml
index bd6592b..0d54fb2 100644
--- a/modules/local/filter_chr/meta.yml
+++ b/modules/local/filter_chr/meta.yml
@@ -28,4 +28,4 @@ output:
       description: |
         The gzipped VCF file containing only chr1-22, chrX, chrY, and chrM entries.
 authors:
-  - github.com/manascripts
\ No newline at end of file
+  - github.com/manascripts
diff --git a/modules/local/jasmine_header_fix/environment.yml b/modules/local/jasmine_header_fix/environment.yml
index 1661bc9..9ca8738 100644
--- a/modules/local/jasmine_header_fix/environment.yml
+++ b/modules/local/jasmine_header_fix/environment.yml
@@ -1,10 +1,10 @@
 name: jasmine_header_fix
 channels:
-- conda-forge
-- bioconda
+  - conda-forge
+  - bioconda
 dependencies:
-- bioconda::bcftools=1.22
-- bioconda::coreutils=8.25
-- bioconda::htslib=1.22.1
-- conda-forge::awk=20250116
-- conda-forge::bash=5.2.37
\ No newline at end of file
+  - bioconda::bcftools=1.22
+  - bioconda::coreutils=8.25
+  - bioconda::htslib=1.22.1
+  - conda-forge::awk=20250116
+  - conda-forge::bash=5.2.37
diff --git a/modules/local/jasmine_header_fix/main.nf b/modules/local/jasmine_header_fix/main.nf
index d182591..32090e9 100644
--- a/modules/local/jasmine_header_fix/main.nf
+++ b/modules/local/jasmine_header_fix/main.nf
@@ -23,7 +23,7 @@ process JASMINE_HEADER_FIX {
 
     when:
       task.ext.when == null || task.ext.when
-        
+
     script:
     """
     set -euo pipefail
@@ -33,7 +33,7 @@ process JASMINE_HEADER_FIX {
     export TMP=\$PWD
     export TEMP=\$PWD
     export BCFTOOLS_PLUGINS=""
-    
+
     # Create a local temp directory
     mkdir -p \$PWD/tmp
     export TMPDIR=\$PWD/tmp
diff --git a/modules/local/rename_vcf/environment.yml b/modules/local/rename_vcf/environment.yml
index b0a213c..ae08d43 100755
--- a/modules/local/rename_vcf/environment.yml
+++ b/modules/local/rename_vcf/environment.yml
@@ -2,4 +2,4 @@ name: rename_vcf
 channels:
   - defaults
 dependencies:
-  - coreutils
\ No newline at end of file
+  - coreutils
diff --git a/modules/local/rename_vcf/main.nf b/modules/local/rename_vcf/main.nf
index d1e7603..100b08a 100755
--- a/modules/local/rename_vcf/main.nf
+++ b/modules/local/rename_vcf/main.nf
@@ -4,10 +4,10 @@ process RENAME_VCF {
 
     input:
       tuple val(meta), path(vcf), val(suffix)
-    
+
     output:
       tuple val(meta), path("${meta.id}_${suffix}.vcf")
-    
+
     when:
       task.ext.when == null || task.ext.when
 
@@ -15,4 +15,4 @@ process RENAME_VCF {
       """
       cp ${vcf} ${meta.id}_${suffix}.vcf
       """
-}
\ No newline at end of file
+}
diff --git a/modules/local/rename_vcf/meta.yml b/modules/local/rename_vcf/meta.yml
index ed1142e..e94bcc4 100755
--- a/modules/local/rename_vcf/meta.yml
+++ b/modules/local/rename_vcf/meta.yml
@@ -29,4 +29,4 @@ output:
       description: |
         The symlinked or renamed VCF file, named as <sample_id>_suffix.vcf.
 authors:
-  - github.com/manascripts
\ No newline at end of file
+  - github.com/manascripts
diff --git a/modules/local/rename_vcf_headers/README.md b/modules/local/rename_vcf_headers/README.md
index 7c26f96..c39fe83 100644
--- a/modules/local/rename_vcf_headers/README.md
+++ b/modules/local/rename_vcf_headers/README.md
@@ -1,20 +1,26 @@
 # rename_vcf_headers
 
 ## Description
+
 This module renames the headers in VCF files to include the correct sample names. It ensures compatibility with downstream tools that require properly formatted VCF headers.
 
 ## Inputs
+
 - **meta**: Metadata containing the sample name.
 - **vcf**: Input VCF file.
 
 ## Outputs
+
 - **renamed_vcf**: VCF file with updated headers.
 
 ## Usage
+
 Include this module in your Nextflow workflow and provide the required inputs. The module uses `awk` to modify the VCF headers.
 
 ## Authors
+
 - Manas Sehgal
 
 ## Requirements
+
 - `awk`
diff --git a/modules/local/rename_vcf_headers/environment.yml b/modules/local/rename_vcf_headers/environment.yml
index 40a2916..25122bf 100755
--- a/modules/local/rename_vcf_headers/environment.yml
+++ b/modules/local/rename_vcf_headers/environment.yml
@@ -3,4 +3,4 @@ channels:
   - defaults
 dependencies:
   - coreutils
-  - htslib
\ No newline at end of file
+  - htslib
diff --git a/modules/local/rename_vcf_headers/main.nf b/modules/local/rename_vcf_headers/main.nf
index c6fef4c..2287331 100644
--- a/modules/local/rename_vcf_headers/main.nf
+++ b/modules/local/rename_vcf_headers/main.nf
@@ -11,7 +11,7 @@ process RENAME_VCF_HEADERS {
 
     input:
     tuple val(meta), path(vcf)
-    
+
     output:
     tuple val(meta), path(vcf)
 
@@ -27,4 +27,3 @@ process RENAME_VCF_HEADERS {
     fi
     """
 }
-
diff --git a/modules/local/summarize_sv_counts/README.md b/modules/local/summarize_sv_counts/README.md
index 7985b5b..7ded004 100755
--- a/modules/local/summarize_sv_counts/README.md
+++ b/modules/local/summarize_sv_counts/README.md
@@ -1,2 +1,3 @@
 # summarize_sv_counts
+
 This module parses structural variant VCFs and generates a per-sample TSV summarizing SV counts by type.
diff --git a/modules/local/summarize_sv_counts/environment.yml b/modules/local/summarize_sv_counts/environment.yml
index 6e52ba2..bd40380 100755
--- a/modules/local/summarize_sv_counts/environment.yml
+++ b/modules/local/summarize_sv_counts/environment.yml
@@ -2,4 +2,4 @@ name: summarize_sv_counts
 channels:
   - defaults
 dependencies:
-  - coreutils
\ No newline at end of file
+  - coreutils
diff --git a/modules/local/summarize_sv_counts/main.nf b/modules/local/summarize_sv_counts/main.nf
index 0d5fb80..0c8c836 100755
--- a/modules/local/summarize_sv_counts/main.nf
+++ b/modules/local/summarize_sv_counts/main.nf
@@ -3,7 +3,7 @@ process SUMMARIZE_SV_COUNTS {
     label 'process_low'
 
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/19/198b15b0581f19cfa29c5ff506b138aeb1bedb226d2ca308c46705bab133c1f0/data' :
         'community.wave.seqera.io/library/bcftools_coreutils_gawk_gzip_pruned:e1a91ca0c5f22302' }"
 
@@ -28,31 +28,31 @@ def analyze_vcf(vcf_file):
         "total_variants": 0,
         "sv_types": defaultdict(lambda: {"count": 0, "svlen_data": []})
     }
-    
+
     if not vcf_file or not os.path.exists(vcf_file):
         return result
-    
+
     try:
         with open(vcf_file, 'r') as f:
             for line in f:
                 if line.startswith('#'):
                     continue
-                
+
                 fields = line.strip().split('\\t')
                 if len(fields) < 8:
                     continue
-                
+
                 result["total_variants"] += 1
                 info = fields[7]
-                
+
                 svtype = "UNK"
                 for item in info.split(';'):
                     if item.startswith('SVTYPE='):
                         svtype = item.split('=')[1]
                         break
-                
+
                 result["sv_types"][svtype]["count"] += 1
-                
+
                 for item in info.split(';'):
                     if item.startswith('SVLEN='):
                         try:
@@ -61,10 +61,10 @@ def analyze_vcf(vcf_file):
                         except:
                             pass
                         break
-    
+
     except Exception as e:
         print(f"Error processing {vcf_file}: {e}")
-    
+
     # Compute statistics
     for svtype, data in result["sv_types"].items():
         if data["svlen_data"]:
@@ -75,7 +75,7 @@ def analyze_vcf(vcf_file):
             data["svlen_median"] = round(statistics.median(lengths), 1)
             data["svlen_stdev"] = round(statistics.stdev(lengths) if len(lengths) > 1 else 0, 2)
         del data["svlen_data"]
-    
+
     return result
 
 # Get inputs
@@ -92,7 +92,7 @@ for vcf_file in vcf_list:
         sample_id = basename.split('_')[0]  # e.g., SAMPLE_caller.vcf
     else:
         sample_id = basename.split('.')[0]   # fallback to full basename without extension
-    
+
     if sample_id not in sample_data:
         sample_data[sample_id] = []
     sample_data[sample_id].append(vcf_file)
@@ -107,11 +107,11 @@ if use_sample_grouping:
         ("analysis_type", "multi_sample"),
         ("samples", {})
     ])
-    
+
     for sample_id, sample_vcfs in sample_data.items():
         print(f"Processing sample: {sample_id}")
         sample_vcfs_list = sample_vcfs if isinstance(sample_vcfs, list) else [sample_vcfs]
-        
+
         if len(sample_vcfs_list) > 1:
             # Multi-caller for this sample
             sample_result = OrderedDict([
@@ -122,7 +122,7 @@ if use_sample_grouping:
                     "sv_types": defaultdict(lambda: {"count": 0, "svlen_data": []})
                 })
             ])
-            
+
             caller_map = {}
             for vcf_file in sample_vcfs_list:
                 vcf_name = os.path.basename(str(vcf_file)).lower()
@@ -134,17 +134,17 @@ if use_sample_grouping:
                     caller_map[vcf_file] = 'severus'
                 else:
                     caller_map[vcf_file] = 'unknown'
-            
+
             for vcf_file, caller in caller_map.items():
                 analyzed_data = analyze_vcf(str(vcf_file))
                 sample_result["callers"][caller] = analyzed_data
-                
+
                 sample_result["combined_stats"]["total_variants"] += analyzed_data["total_variants"]
                 for svtype, data in analyzed_data["sv_types"].items():
                     sample_result["combined_stats"]["sv_types"][svtype]["count"] += data["count"]
                     if "svlen_data" in data:
                         sample_result["combined_stats"]["sv_types"][svtype]["svlen_data"].extend(data["svlen_data"])
-            
+
             # Compute combined stats
             for svtype, data in sample_result["combined_stats"]["sv_types"].items():
                 if data["svlen_data"]:
@@ -155,7 +155,7 @@ if use_sample_grouping:
                     data["svlen_median"] = round(statistics.median(lengths), 1)
                     data["svlen_stdev"] = round(statistics.stdev(lengths) if len(lengths) > 1 else 0, 2)
                     del data["svlen_data"]
-            
+
         else:
             # Single VCF for this sample
             analyzed_data = analyze_vcf(str(sample_vcfs_list[0]))
@@ -164,7 +164,7 @@ if use_sample_grouping:
                 ("total_variants", analyzed_data["total_variants"]),
                 ("sv_types", analyzed_data["sv_types"])
             ])
-        
+
         result["samples"][sample_id] = sample_result
 
 else:
@@ -180,7 +180,7 @@ else:
                 "sv_types": defaultdict(lambda: {"count": 0, "svlen_data": []})
             })
         ])
-        
+
         caller_map = {}
         for vcf_file in vcf_list:
             vcf_name = os.path.basename(vcf_file).lower()
@@ -192,17 +192,17 @@ else:
                 caller_map[vcf_file] = 'severus'
             else:
                 caller_map[vcf_file] = 'unknown'
-        
+
         for vcf_file, caller in caller_map.items():
             analyzed_data = analyze_vcf(vcf_file)
             result["callers"][caller] = analyzed_data
-            
+
             result["combined_stats"]["total_variants"] += analyzed_data["total_variants"]
             for svtype, data in analyzed_data["sv_types"].items():
                 result["combined_stats"]["sv_types"][svtype]["count"] += data["count"]
                 if "svlen_data" in data:
                     result["combined_stats"]["sv_types"][svtype]["svlen_data"].extend(data["svlen_data"])
-        
+
         # Compute combined statistics
         for svtype, data in result["combined_stats"]["sv_types"].items():
             if data["svlen_data"]:
@@ -213,12 +213,12 @@ else:
                 data["svlen_median"] = round(statistics.median(lengths), 1)
                 data["svlen_stdev"] = round(statistics.stdev(lengths) if len(lengths) > 1 else 0, 2)
                 del data["svlen_data"]
-    
+
     else:
         # Single VCF analysis
         vcf_file = vcf_list[0] if vcf_list else ""
         analyzed_data = analyze_vcf(vcf_file)
-        
+
         result = OrderedDict([
             ("stage", stage),
             ("analysis_type", "single_vcf"),
@@ -248,4 +248,4 @@ EOF
         python: \$(python --version | sed 's/Python //')
     END_VERSIONS
     """
-}
\ No newline at end of file
+}
diff --git a/modules/local/summarize_sv_counts/meta.yml b/modules/local/summarize_sv_counts/meta.yml
index c24a203..44dda40 100755
--- a/modules/local/summarize_sv_counts/meta.yml
+++ b/modules/local/summarize_sv_counts/meta.yml
@@ -34,4 +34,4 @@ output:
         TSV file summarizing SV type counts for the sample, named as <sample_id>_sv_summary.tsv.
 
 authors:
-  - github.com/manascripts
\ No newline at end of file
+  - github.com/manascripts
diff --git a/modules/nf-core/severus/environment.yml b/modules/nf-core/severus/environment.yml
index 74eb305..e824160 100755
--- a/modules/nf-core/severus/environment.yml
+++ b/modules/nf-core/severus/environment.yml
@@ -1,12 +1,12 @@
 ---
 # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
 channels:
-- conda-forge
-- bioconda
+  - conda-forge
+  - bioconda
 dependencies:
-- bioconda::bcftools=1.22
-- bioconda::coreutils=8.25
-- conda-forge::gawk=5.3.1
-- conda-forge::gzip=1.14
-- conda-forge::jq=1.8.1
-- conda-forge::python=3.13.0
+  - bioconda::bcftools=1.22
+  - bioconda::coreutils=8.25
+  - conda-forge::gawk=5.3.1
+  - conda-forge::gzip=1.14
+  - conda-forge::jq=1.8.1
+  - conda-forge::python=3.13.0
diff --git a/nextflow.config b/nextflow.config
index deea6b9..241e8ed 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -22,14 +22,14 @@ params {
     vntr_bed                   = "${projectDir}/assets/vntr/hg38.p14.trf.bed"
     pon_file                   = "${projectDir}/assets/pon/PoN_1000G_hg38_extended.tsv.gz"
     PON                        = "${projectDir}/assets/pon/PoN_1000G_hg38_extended.tsv.gz"
-    
+
     // SVDB databases
     svdb_databases             = [
         "${projectDir}/assets/svdb_databases/gnomad.v4.1.sv.sites.rmMissing.vcf.gz",
         "${projectDir}/assets/svdb_databases/UWONT_450_AF.vcf.gz"
     ]
     svdb_in_frq               = ['AF', 'PFneedLR']
-    svdb_out_frq              = ['AFgnomAD', 'AFneedLR']  
+    svdb_out_frq              = ['AFgnomAD', 'AFneedLR']
     svdb_in_occ               = ['AN', 'AN']
     svdb_out_occ              = ['ANgnomAD', 'ANneedLR']
 
@@ -41,7 +41,7 @@ params {
     cluster_merge_pos         = 100
     sniffles_args             = ""
 
-    // CuteSV individual parameters  
+    // CuteSV individual parameters
     cutesv_min_support        = 5
     min_size                  = 50
     cutesv_min_mapq           = 20
@@ -183,16 +183,16 @@ profiles {
         shifter.enabled         = false
         charliecloud.enabled    = false
         apptainer.enabled       = false
-        
+
         process {
             executor = 'lsf'
             scratch = '/omics/odcf/analysis/OE0415_projects/nb_lrs/LRS/2025-02-01_NB_LRS/05-analysis_tmp/nf-core-ontvar-test/tmp'
-            
+
             // Queue selection based on your cluster configuration
             queue = {
                 def hours = (task.time ?: 6.h).toHours()
-                
-                if (hours <= 10.0/60.0) { 
+
+                if (hours <= 10.0/60.0) {
                     'short'
                 } else if (hours <= 1.0) {  // 1 hour
                     'medium'
@@ -204,9 +204,9 @@ profiles {
                     'verylong'
                 }
             }
-            
+
             // Resource limits based on your cluster specs
-            memory = { 
+            memory = {
                 def mem = (task.memory ?: 8.GB).toGiga()
                 if (mem > 200) {
                     // highmem queue - up to 4TB available but no hard limit
@@ -217,7 +217,7 @@ profiles {
                 }
             }
             cpus = { Math.min(task.cpus ?: 2, 10) }  // Max 10 cores per your cluster config
-            time = { 
+            time = {
                 def hours = (task.ext.time ?: 10.h).toHours()  // Use task.ext.time or a fixed default
                 if (hours <= 10.0/60.0) {
                     '10.m'  // short queue
@@ -230,7 +230,7 @@ profiles {
                 }
             }
         }
-        
+
         executor {
             name = 'lsf'
             perTaskReserve = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ff3b2fb..18a7fee 100755
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -134,7 +134,7 @@
             "properties": {
                 "svdb_databases": {
                     "type": "array",
-                    "items": {"type": "string"},
+                    "items": { "type": "string" },
                     "default": [
                         "${projectDir}/assets/svdb_databases/gnomad.v4.1.sv.sites.rmMissing.vcf.gz",
                         "${projectDir}/assets/svdb_databases/UWONT_450_AF.vcf.gz"
@@ -144,28 +144,28 @@
                 },
                 "svdb_in_frq": {
                     "type": "array",
-                    "items": {"type": "string"},
+                    "items": { "type": "string" },
                     "default": ["AF", "PFneedLR"],
                     "description": "The frequency count tags in the database INFO columns.",
                     "fa_icon": "fas fa-tags"
                 },
                 "svdb_out_frq": {
                     "type": "array",
-                    "items": {"type": "string"},
+                    "items": { "type": "string" },
                     "default": ["AFgnomAD", "AFneedLR"],
                     "description": "The frequency count tags to be included in the output.",
                     "fa_icon": "fas fa-tags"
                 },
                 "svdb_in_occ": {
                     "type": "array",
-                    "items": {"type": "string"},
+                    "items": { "type": "string" },
                     "default": ["AN", "AN"],
                     "description": "The allele count tags in the database INFO columns.",
                     "fa_icon": "fas fa-tags"
                 },
                 "svdb_out_occ": {
                     "type": "array",
-                    "items": {"type": "string"},
+                    "items": { "type": "string" },
                     "default": ["ANgnomAD", "ANneedLR"],
                     "description": "The allele count tags to be included in the output.",
                     "fa_icon": "fas fa-tags"
@@ -530,7 +530,7 @@
                 },
                 "false_positive_snv": {
                     "type": ["string", "null"],
-                    "format": "file-path", 
+                    "format": "file-path",
                     "default": null,
                     "description": "Path to false positive SNV file for AnnotSV.",
                     "fa_icon": "fas fa-file-alt"
@@ -775,4 +775,4 @@
             "$ref": "#/$defs/generic_options"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/workflows/ontvar.nf b/workflows/ontvar.nf
index 88c066b..74dbe26 100755
--- a/workflows/ontvar.nf
+++ b/workflows/ontvar.nf
@@ -21,8 +21,8 @@ include { JASMINE_HEADER_FIX } from '../modules/local/jasmine_header_fix/main'
 include { JASMINESV as JASMINESV_COHORT } from '../modules/nf-core/jasminesv/main'
 include { FILTER_CHR } from '../modules/local/filter_chr/main'
 include { ANNOTSV_ANNOTSV as ANNOTSV_COHORT    } from '../modules/nf-core/annotsv/annotsv/main'
-include { ANNOTSV_ANNOTSV as ANNOTSV_PER_SAMPLE_RAW    } from '../modules/nf-core/annotsv/annotsv/main' 
-include { ANNOTSV_ANNOTSV as ANNOTSV_PER_SAMPLE    } from '../modules/nf-core/annotsv/annotsv/main' 
+include { ANNOTSV_ANNOTSV as ANNOTSV_PER_SAMPLE_RAW    } from '../modules/nf-core/annotsv/annotsv/main'
+include { ANNOTSV_ANNOTSV as ANNOTSV_PER_SAMPLE    } from '../modules/nf-core/annotsv/annotsv/main'
 include { ANNOTSV_INSTALLANNOTATIONS } from '../modules/nf-core/annotsv/installannotations/main'
 include { UNTAR as UNTAR_ANNOTSV } from '../modules/nf-core/untar/main'
 include { SUMMARIZE_SV_COUNTS as SUMMARIZE_CALLERS          } from '../modules/local/summarize_sv_counts/main'
@@ -69,7 +69,7 @@ workflow ONTVAR {
     sv_input = cases
         .map { c -> tuple(c[0], c[0], c[3]) }  // map to [group_id, group_id, bam_path] - using group_id as sample name
         .join(controls_by_match, remainder: true)  // join on group_id
-        .map { it -> 
+        .map { it ->
             def group_id = it[0]  // Use group_id as sample identifier
             def case_bam = it[2]
             def control_bam = it[3]  // control_bam comes from join
@@ -94,14 +94,14 @@ workflow ONTVAR {
         Channel.value(true),                                                    // Input 4: vcf_output
         Channel.value(false)                                                    // Input 5: snf_output
     )
-    
+
     // CUTESV
     cutesv_input = sv_input
         .map { it ->
             def group_id = it[0]
             tuple([id: "${group_id}", sample: group_id], it[1], file("${it[1]}.bai"))
         }
-    
+
     CUTESV(
         cutesv_input,
         Channel.value(tuple([id: "reference"], file(reference)))
@@ -115,19 +115,19 @@ workflow ONTVAR {
             def control_bam = it[2]
             tuple([id: group_id, sample: group_id, has_control: true, control_bam: control_bam], case_bam, file("${case_bam}.bai"), control_bam, file("${control_bam}.bai"), [])
         }
-    
+
     severus_no_control_input = sv_input.filter { !it[2] }
         .map { it ->
             def group_id = it[0]
             def case_bam = it[1]
             tuple([id: group_id, sample: group_id, has_control: false], case_bam, file("${case_bam}.bai"), [], [], [])
         }
-    
+
     SEVERUS_WITH_CONTROL(
         severus_with_control_input,                                             // Input 1: [meta, target_bam, target_bai, control_bam, control_bai, vcf]
         Channel.value(tuple([id: "vntr"], file(params.vntr_bed)))               // Input 2: [meta, vntr_bed]
     )
-    
+
     SEVERUS_NO_CONTROL(
         severus_no_control_input,                                               // Input 1: [meta, target_bam, target_bai, control_bam, control_bai, vcf]
         Channel.value(tuple([id: "vntr"], file(params.vntr_bed)))               // Input 2: [meta, vntr_bed]
@@ -135,8 +135,8 @@ workflow ONTVAR {
 
     severus_vcfs = SEVERUS_WITH_CONTROL.out.somatic_vcf
         .mix(SEVERUS_NO_CONTROL.out.somatic_vcf)
-        .map { meta, vcf -> 
-            tuple(meta, vcf, 'severus') 
+        .map { meta, vcf ->
+            tuple(meta, vcf, 'severus')
         } | RENAME_VCF
 
     // ──────────────────────────────────────────────────────────────────────
@@ -171,19 +171,19 @@ workflow ONTVAR {
     // ──────────────────────────────────────────────────────────────────────
 
     sv_calls_by_sample = all_caller_vcfs
-        .map { meta, vcf -> 
+        .map { meta, vcf ->
             def group_id = meta.sample ?: meta.id  // Use sample field first, fallback to id
             tuple(group_id, vcf)
         }
         .groupTuple(by: 0)
 
     // ──────────────────────────────────────────────────────────────────────
-    // Run Jasmine to merge SVs from callers per sample  
+    // Run Jasmine to merge SVs from callers per sample
     // ──────────────────────────────────────────────────────────────────────
 
     jasminesv_sample_input = sv_calls_by_sample
         .filter { meta, vcf_list -> vcf_list.size() > 0 }
-        .map { meta, vcf_list -> 
+        .map { meta, vcf_list ->
             def group_id = meta
             tuple([id: group_id, sample: group_id, step: "consensus"], vcf_list, [], [])
         }
@@ -234,7 +234,7 @@ workflow ONTVAR {
         def idx_out = idx.exists() ? idx : []
         tuple(updated_meta, file(v), idx_out)
     }
-    
+
     CALLER_SUPPORT_FILTER(
         bcftools_sample_input,
         Channel.value([]), // regions
@@ -256,7 +256,7 @@ workflow ONTVAR {
     // ──────────────────────────────────────────────────────────────────────
     // SAMPLE LEVEL AF ANNOTATION + FILTERING + ANNOTSV ANNOTATION
     // ──────────────────────────────────────────────────────────────────────
-    
+
     ch_per_sample_input = CALLER_SUPPORT_FILTER.out.vcf
         .map { meta, vcf -> tuple(meta, vcf) }
 
@@ -285,7 +285,7 @@ workflow ONTVAR {
         def idx_out = idx.exists() ? idx : []
         tuple(updated_meta, file(annotated_vcf), idx_out)
     }
-    
+
     ch_bcftools_regions = Channel.value([])
     ch_bcftools_targets = Channel.value([])
     ch_bcftools_samples = Channel.value([])
@@ -345,7 +345,7 @@ workflow ONTVAR {
 
     ANNOTSV_PER_SAMPLE(
         AF_FILTER.out.vcf
-            .map { meta, vcf -> 
+            .map { meta, vcf ->
                 def updated_meta = [id: meta.sample ?: meta.id, sample: meta.sample ?: meta.id, step: "final_annotation"]
                 tuple(updated_meta, vcf, [], [])
             },
@@ -364,7 +364,7 @@ workflow ONTVAR {
         .collect()
 
     jasminesv_cohort_input = sample_consensus_vcfs
-        .map { vcf_list -> 
+        .map { vcf_list ->
         tuple([id: "cohort"], vcf_list, [], [])
     }
 
@@ -451,7 +451,7 @@ workflow ONTVAR {
     // ──────────────────────────────────────────────────────────────────────
     // Collate and save software versions
     // ──────────────────────────────────────────────────────────────────────
-    
+
     ch_versions = ch_versions.mix(SNIFFLES.out.versions)
     ch_versions = ch_versions.mix(CUTESV.out.versions)
     ch_versions = ch_versions.mix(SEVERUS_WITH_CONTROL.out.versions)
@@ -527,4 +527,3 @@ workflow ONTVAR {
     THE END
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
-

From 9b6dc90881eac15c95600cfa6f346bee1d0a9b76 Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Mon, 29 Sep 2025 15:03:42 +0200
Subject: [PATCH 09/11] remove pon_file alias

---
 nextflow.config      | 1 -
 nextflow_schema.json | 9 +--------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 241e8ed..194a8db 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -20,7 +20,6 @@ params {
     // File paths
     tandem_repeats             = "${projectDir}/assets/vntr/hg38.p14.trf.bed"
     vntr_bed                   = "${projectDir}/assets/vntr/hg38.p14.trf.bed"
-    pon_file                   = "${projectDir}/assets/pon/PoN_1000G_hg38_extended.tsv.gz"
     PON                        = "${projectDir}/assets/pon/PoN_1000G_hg38_extended.tsv.gz"
 
     // SVDB databases
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 18a7fee..79d15f2 100755
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -110,18 +110,11 @@
                     "description": "Path to VNTR BED file for Severus.",
                     "fa_icon": "fas fa-file-alt"
                 },
-                "pon_file": {
-                    "type": "string",
-                    "format": "file-path",
-                    "default": "${projectDir}/assets/pon/PoN_1000G_hg38_extended.tsv.gz",
-                    "description": "Panel of Normals file.",
-                    "fa_icon": "fas fa-database"
-                },
                 "PON": {
                     "type": "string",
                     "format": "file-path",
                     "default": "${projectDir}/assets/pon/PoN_1000G_hg38_extended.tsv.gz",
-                    "description": "Panel of Normals file (alias for pon_file).",
+                    "description": "Panel of Normals file for filtering.",
                     "fa_icon": "fas fa-database"
                 }
             }

From 858ed39b0e357b3cf24f615248e3fb1fb84a1d33 Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Mon, 29 Sep 2025 15:07:33 +0200
Subject: [PATCH 10/11] update -SVminSize in annotsv

---
 conf/modules.config  | 6 +++---
 nextflow.config      | 2 +-
 nextflow_schema.json | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 1c71ef2..228bf55 100755
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -247,7 +247,7 @@ process {
         cpus = 3
         memory = { 8.GB * task.attempt }
         ext.args = {
-            def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.min_sv_size}"
+            def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.annotsv_min_sv_size}"
             def user_args = params.annotsv_args ? params.annotsv_args : ""
             return "${base_args} ${user_args}".trim()
         }
@@ -288,7 +288,7 @@ process {
         cpus = 3
         memory = { 8.GB * task.attempt }
         ext.args = {
-            def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.min_sv_size}"
+            def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.annotsv_min_sv_size}"
             def user_args = params.annotsv_args ? params.annotsv_args : ""
             return "${base_args} ${user_args}".trim()
         }
@@ -362,7 +362,7 @@ process {
         cpus = 6
         memory = 8.GB
         ext.args = {
-            def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.min_sv_size}"
+            def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.annotsv_min_sv_size}"
             def user_args = params.annotsv_args ? params.annotsv_args : ""
             return "${base_args} ${user_args}".trim()
         }
diff --git a/nextflow.config b/nextflow.config
index 194a8db..7c2296e 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -88,7 +88,7 @@ params {
     // AnnotSV individual parameters
     genome_build              = "GRCh38"
     output_vcf                = 1
-    min_sv_size               = 0
+    annotsv_min_sv_size       = 0
     annotsv_args              = ""
 
     // References
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 79d15f2..0005ccd 100755
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -542,7 +542,7 @@
                     "description": "Output VCF format in addition to TSV (1=yes, 0=no).",
                     "fa_icon": "fas fa-file-code"
                 },
-                "min_sv_size": {
+                "annotsv_min_sv_size": {
                     "type": "integer",
                     "default": 0,
                     "minimum": 0,

From 1ce1abcfcceccb693833053420b8b41bfd627342 Mon Sep 17 00:00:00 2001
From: manascripts <manas.sehgal@dkfz.de>
Date: Wed, 1 Oct 2025 11:02:12 +0200
Subject: [PATCH 11/11] fix annotsv outputs and remove -C from
 jasmine_header_fix bash shebang

---
 conf/modules.config | 33 +++++++++++++++++++++++++++++++--
 workflows/ontvar.nf | 34 ++++++++++++++++++++++++++--------
 2 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 228bf55..2e9471c 100755
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -330,6 +330,24 @@ process {
         ]
     }
 
+    withName: 'ANNOTSV_COHORT_RAW' {
+        cpus = 6
+        memory = 8.GB
+        ext.args = {
+            def base_args = "-genomeBuild ${params.genome_build} -vcf ${params.output_vcf} -SVminSize ${params.annotsv_min_sv_size}"
+            def user_args = params.annotsv_args ? params.annotsv_args : ""
+            return "${base_args} ${user_args}".trim()
+        }
+        publishDir = [
+            path: { "${params.outdir}/cohort" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{tsv,vcf}',
+            saveAs: { filename ->
+                filename.replaceAll(/^[^.]+/, 'cohort_annotated')
+            }
+        ]
+    }
+
     withName: 'SVDB_QUERY_COHORT' {
         cpus = 12
         memory = { 16.GB * task.attempt }
@@ -371,7 +389,7 @@ process {
             mode: params.publish_dir_mode,
             pattern: '*.{tsv,vcf}',
             saveAs: { filename ->
-                filename.replaceAll(/^[^.]+/, 'cohort')
+                filename.replaceAll(/^[^.]+/, 'cohort_filtered')
             }
         ]
     }
@@ -431,7 +449,18 @@ process {
     }
 
     // Cohort summary (in cohort/)
-    withName: 'SUMMARIZE_COHORT' {
+    withName: 'SUMMARIZE_COHORT_ANNOTATED' {
+        cpus = 1
+        memory = { 3.GB * task.attempt }
+        publishDir = [
+            path: { "${params.outdir}/cohort" },
+            mode: params.publish_dir_mode,
+            pattern: '*_summary.json'
+        ]
+    }
+
+    // Cohort filtered summary (in cohort/)
+    withName: 'SUMMARIZE_COHORT_FILTERED' {
         cpus = 1
         memory = { 3.GB * task.attempt }
         publishDir = [
diff --git a/workflows/ontvar.nf b/workflows/ontvar.nf
index 74dbe26..fa2c475 100755
--- a/workflows/ontvar.nf
+++ b/workflows/ontvar.nf
@@ -20,6 +20,7 @@ include { JASMINESV as JASMINESV_SAMPLE } from '../modules/nf-core/jasminesv/mai
 include { JASMINE_HEADER_FIX } from '../modules/local/jasmine_header_fix/main'
 include { JASMINESV as JASMINESV_COHORT } from '../modules/nf-core/jasminesv/main'
 include { FILTER_CHR } from '../modules/local/filter_chr/main'
+include { ANNOTSV_ANNOTSV as ANNOTSV_COHORT_RAW } from '../modules/nf-core/annotsv/annotsv/main'
 include { ANNOTSV_ANNOTSV as ANNOTSV_COHORT    } from '../modules/nf-core/annotsv/annotsv/main'
 include { ANNOTSV_ANNOTSV as ANNOTSV_PER_SAMPLE_RAW    } from '../modules/nf-core/annotsv/annotsv/main'
 include { ANNOTSV_ANNOTSV as ANNOTSV_PER_SAMPLE    } from '../modules/nf-core/annotsv/annotsv/main'
@@ -28,7 +29,8 @@ include { UNTAR as UNTAR_ANNOTSV } from '../modules/nf-core/untar/main'
 include { SUMMARIZE_SV_COUNTS as SUMMARIZE_CALLERS          } from '../modules/local/summarize_sv_counts/main'
 include { SUMMARIZE_SV_COUNTS as SUMMARIZE_CALLER_MERGED    } from '../modules/local/summarize_sv_counts/main'
 include { SUMMARIZE_SV_COUNTS as SUMMARIZE_CALLER_MERGED_FILTERED  } from '../modules/local/summarize_sv_counts/main'
-include { SUMMARIZE_SV_COUNTS as SUMMARIZE_COHORT    } from '../modules/local/summarize_sv_counts/main'
+include { SUMMARIZE_SV_COUNTS as SUMMARIZE_COHORT_ANNOTATED } from '../modules/local/summarize_sv_counts/main'
+include { SUMMARIZE_SV_COUNTS as SUMMARIZE_COHORT_FILTERED  } from '../modules/local/summarize_sv_counts/main'
 include { SVDB_QUERY as SVDB_QUERY_SAMPLE } from '../modules/nf-core/svdb/query/main'
 include { SVDB_QUERY as SVDB_QUERY_COHORT } from '../modules/nf-core/svdb/query/main'
 include { BCFTOOLS_VIEW as CALLER_SUPPORT_FILTER } from '../modules/nf-core/bcftools/view/main'
@@ -405,6 +407,26 @@ workflow ONTVAR {
         ch_svdb_cohort_bedpe
     )
 
+    ch_candidate_genes_cohort    = Channel.value(tuple([id: "candidate_genes"], []))
+    ch_false_positive_snv_cohort = Channel.value(tuple([id: "false_positive_snv"], []))
+    ch_gene_transcripts_cohort   = Channel.value(tuple([id: "gene_transcripts"], []))
+
+    ANNOTSV_COHORT_RAW(
+        SVDB_QUERY_COHORT.out.vcf
+            .map { meta, vcf -> tuple(meta, vcf, [], []) },
+        ch_annotsv_annotations,
+        ch_candidate_genes_cohort,
+        ch_false_positive_snv_cohort,
+        ch_gene_transcripts_cohort
+    )
+
+    // Cohort summaries - one per cohort directory
+    SUMMARIZE_COHORT_ANNOTATED(
+        SVDB_QUERY_COHORT.out.vcf
+            .map { meta, vcf -> tuple([id: "cohort_annotated_summary"], vcf) },
+        Channel.value("cohort_annotated")
+    )
+
     // ──────────────────────────────────────────────────────────────────────
     // Filter annotated SVs based on AF
     // ──────────────────────────────────────────────────────────────────────
@@ -421,10 +443,10 @@ workflow ONTVAR {
     )
 
     // Cohort summaries - one per cohort directory
-    SUMMARIZE_COHORT(
+    SUMMARIZE_COHORT_FILTERED(
         AF_FILTER_COHORT.out.vcf
-            .map { meta, vcf -> tuple([id: "cohort_summary"], vcf) },
-        Channel.value("cohort")
+            .map { meta, vcf -> tuple([id: "cohort_filtered_summary"], vcf) },
+        Channel.value("cohort_filtered")
     )
 
     // ──────────────────────────────────────────────────────────────────────
@@ -436,10 +458,6 @@ workflow ONTVAR {
         tuple(meta, filtered_vcf, [], [])
     }
 
-    ch_candidate_genes_cohort    = Channel.value(tuple([id: "candidate_genes"], []))
-    ch_false_positive_snv_cohort = Channel.value(tuple([id: "false_positive_snv"], []))
-    ch_gene_transcripts_cohort   = Channel.value(tuple([id: "gene_transcripts"], []))
-
     ANNOTSV_COHORT(
         annotsv_input,
         ch_annotsv_annotations,