From faf80d3e93bbc9a9a4e1ec90fdf75e57b4559820 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 20 Jun 2025 12:03:44 +0200 Subject: [PATCH 1/8] start converting test_microbial to nf-test --- tests/test_microbial.nf.test | 149 ++++++ tests/test_microbial.nf.test.snap | 754 ++++++++++++++++++++++++++++++ 2 files changed, 903 insertions(+) create mode 100644 tests/test_microbial.nf.test create mode 100644 tests/test_microbial.nf.test.snap diff --git a/tests/test_microbial.nf.test b/tests/test_microbial.nf.test new file mode 100644 index 000000000..a9625c70f --- /dev/null +++ b/tests/test_microbial.nf.test @@ -0,0 +1,149 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_EAGER" + script "main.nf" + tag "pipeline" + tag "nfcore_eager" + tag "test_microbial" + + test("test_microbial_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + + /////////////////// + // DOCUMENTATION // + /////////////////// + + // The contents of each top level results directory should be tested with individually named snapshots. + // Within each snapshot, there should be two to three distinct variables, that contain the files to be tested. + // - stable_name_ is for files with variable md5sums (i.e. content) so only names will be compared + // - stable_content_ is for files with stable md5sums (i.e. content) so md5sums will be compared + // - bams_ is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable) + // If a directory is fully stable, you can drop `stable_name_*` + // If a directory contains no BAMs, you can drop `bams_*` + + // Generate with: nf-test test --tag test_microbial --profile docker,test_microbial --update-snapshot + // Test with: nf-test test --tag test_microbial --profile docker,test_microbial + // NOTE: BAMs are always only stable in name, because: + // a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112) + // b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order) + // point b) also causes BAIs to be unstable. + // c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes) + + ////////////////////// + // DEFINE VARIABLES // + ////////////////////// + + // Define exclusion patterns for files with unstable contents + // NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here + // This is particularly important if the patterns excluded in the stable content section should be included in the stable name section + def unstable_patterns_auth = [ + '**/mapped_reads_gc-content_distribution.txt', + '**/mapped_reads_nucleotide_content.txt', + '**/genome_gc_content_per_window.png', + '**/*.{svg,pdf,html,png}', + '**/DamageProfiler.log', + '**/3p_freq_misincorporations.txt', + '**/5p_freq_misincorporations.txt', + '**/DNA_comp_genome.txt', + '**/DNA_composition_sample.txt', + '**/misincorporation.txt', + '**/genome_results.txt', + ] + + // Check that no files are missing/added + // Command legend: Result directory to index , includeDir: include dirs?, ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns + def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] ) + + // Authentication + def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] ) + def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth) + + // Deduplication - TODO -> snapshot both lists are empty!? + def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // Final_bams + def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // Mapping (incl. bam_input flasgstat) + def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // // Preprocessing + // // NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing. + def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] ) + def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] ) + + // // Read filtering + def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // // Genotyping + def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{tbi,vcf.gz}'] , ignoreFile: null , include: ['**/*'] ) + def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.tbi'] ) + // We need to collect the vcfs separately to run more specific md5sum checks on the header (contnts are unstable due to same reasons as BAMs, explained above). + def genotyping_vcfs = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.vcf.gz'] ) + + // // Metagenomics + def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*table.tsv'] , ignoreFile: null , include: ['**/*'] ) + def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*table.tsv'] ) + + // MultiQC + def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] ) + + /////////////////////// + // DEFINE ASSERTIONS // + /////////////////////// + + assertAll( + { assert workflow.success }, + // This checks that there are no missing or additional output files. + // Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections. + { assert snapshot( stable_name_all*.name ).match("all_files") }, + + // Checking changes to contents of each section + // NOTE: Keep the order of the sections in the alphanumeric order of the output directories. + // Each section should first check stable_content, stable_name second (if applicable). + { assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") }, + { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") }, + { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") }, + // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279 + { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") }, + { assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") }, + { assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") }, + { assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") }, + // Additional checks on the genotyping VCFs for content. Specifically the md5sums of the header FORMAT, INFO, FILTER, CONTIG lines, and sample names + { assert snapshot( + genotyping_vcfs.collect { + file -> + def vcf_head = path(file.toString()).vcf.header + // The header contains lines in the "OTHER" category, which contain a timestamp and/or work dir paths, so we need to filter those out, then calculate md5sums. + def header_md5 = [ + vcf_head.getFormatHeaderLines().toString(), + vcf_head.getInfoHeaderLines().toString(), + vcf_head.getFilterLines().toString(), + vcf_head.getIDHeaderLines().toString(), + vcf_head.getGenotypeSamples().toString(), + vcf_head.getContigLines().toString(), + ].join(' ').md5() + file.getName() + ":header_md5," + header_md5 + } + ).match("genotyping_vcfs")}, + { assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") }, + { assert snapshot( stable_name_multiqc*.name ).match("multiqc") }, + + // Versions + { assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() }, + + ) + } + } +} diff --git a/tests/test_microbial.nf.test.snap b/tests/test_microbial.nf.test.snap new file mode 100644 index 000000000..f1a9a88f5 --- /dev/null +++ b/tests/test_microbial.nf.test.snap @@ -0,0 +1,754 @@ +{ + "all_files": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause_depth.bed", + "3pGtoA_freq.txt", + "3p_freq_misincorporations.txt", + "5pCtoT_freq.txt", + "5p_freq_misincorporations.txt", + "DNA_comp_genome.txt", + "DNA_composition_sample.txt", + "DamagePlot.pdf", + "DamagePlot_five_prime.svg", + "DamagePlot_three_prime.svg", + "DamageProfiler.log", + "Length_plot.pdf", + "Length_plot_combined_data.svg", + "Length_plot_forward_reverse_separated.svg", + "dmgprof.json", + "editDistance.txt", + "edit_distance.pdf", + "edit_distance.svg", + "lgdistribution.txt", + "misincorporation.txt", + "3pGtoA_freq.txt", + "3p_freq_misincorporations.txt", + "5pCtoT_freq.txt", + "5p_freq_misincorporations.txt", + "DNA_comp_genome.txt", + "DNA_composition_sample.txt", + "DamagePlot.pdf", + "DamagePlot_five_prime.svg", + "DamagePlot_three_prime.svg", + "DamageProfiler.log", + "Length_plot.pdf", + "Length_plot_combined_data.svg", + "Length_plot_forward_reverse_separated.svg", + "dmgprof.json", + "editDistance.txt", + "edit_distance.pdf", + "edit_distance.svg", + "lgdistribution.txt", + "misincorporation.txt", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_percent_on_target_mqc.json", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_percent_on_target_mqc.json", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.c_curve.txt", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.command.log", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.c_curve.txt", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.command.log", + "agogo.css", + "ajax-loader.gif", + "basic.css", + "bgfooter.png", + "bgtop.png", + "comment-bright.png", + "comment-close.png", + "comment.png", + "doctools.js", + "down-pressed.png", + "down.png", + "file.png", + "jquery.js", + "minus.png", + "plus.png", + "pygments.css", + "qualimap_logo_small.png", + "report.css", + "searchtools.js", + "underscore.js", + "up-pressed.png", + "up.png", + "websupport.js", + "genome_results.txt", + "genome_coverage_0to50_histogram.png", + "genome_coverage_across_reference.png", + "genome_coverage_histogram.png", + "genome_coverage_quotes.png", + "genome_gc_content_per_window.png", + "genome_homopolymer_indels.png", + "genome_mapping_quality_across_reference.png", + "genome_mapping_quality_histogram.png", + "genome_reads_clipping_profile.png", + "genome_reads_content_per_read_position.png", + "genome_uniq_read_starts_histogram.png", + "qualimapReport.html", + "coverage_across_reference.txt", + "coverage_histogram.txt", + "duplication_rate_histogram.txt", + "genome_fraction_coverage.txt", + "homopolymer_indels.txt", + "mapped_reads_clipping_profile.txt", + "mapped_reads_gc-content_distribution.txt", + "mapped_reads_nucleotide_content.txt", + "mapping_quality_across_reference.txt", + "mapping_quality_histogram.txt", + "agogo.css", + "ajax-loader.gif", + "basic.css", + "bgfooter.png", + "bgtop.png", + "comment-bright.png", + "comment-close.png", + "comment.png", + "doctools.js", + "down-pressed.png", + "down.png", + "file.png", + "jquery.js", + "minus.png", + "plus.png", + "pygments.css", + "qualimap_logo_small.png", + "report.css", + "searchtools.js", + "underscore.js", + "up-pressed.png", + "up.png", + "websupport.js", + "genome_results.txt", + "qualimapReport.html", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.bam", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.bam", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.flagstat", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.flagstat", + "JK2782_Mammoth_MT_Krause.bam", + "JK2782_Mammoth_MT_Krause.bam.bai", + "JK2782_hs37d5_chr21-MT.bam", + "JK2782_hs37d5_chr21-MT.bam.bai", + "JK2782_Mammoth_MT_Krause.flagstat", + "JK2782_hs37d5_chr21-MT.flagstat", + "JK2782_Mammoth_MT_Krause.bcftools_stats.txt", + "JK2782_hs37d5_chr21-MT.bcftools_stats.txt", + "JK2782_Mammoth_MT_Krause.vcf.gz", + "JK2782_Mammoth_MT_Krause.vcf.gz.tbi", + "JK2782_hs37d5_chr21-MT.vcf.gz", + "JK2782_hs37d5_chr21-MT.vcf.gz.tbi", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.bam", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.bam", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.flagstat", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.flagstat", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", + "krakenuniq_taxpasta_table.tsv", + "bcftools-stats-subtypes.txt", + "bcftools_stats_indel-lengths.txt", + "bcftools_stats_variant_depths.txt", + "bcftools_stats_vqc_Count_Indels.txt", + "bcftools_stats_vqc_Count_SNP.txt", + "bcftools_stats_vqc_Count_Transitions.txt", + "bcftools_stats_vqc_Count_Transversions.txt", + "fastp-insert-size-plot.txt", + "fastp-seq-content-gc-plot_Merged_and_filtered.txt", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "fastp-seq-content-n-plot_Merged_and_filtered.txt", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "fastp-seq-quality-plot_Merged_and_filtered.txt", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "fastp_filtered_reads_plot.txt", + "fiveprime_misinc_plot.txt", + "length-distribution-Forward.txt", + "length-distribution-Reverse.txt", + "mapdamage-fiveprime_misinc_plot.txt", + "mapdamage-length-distribution-Forward.txt", + "mapdamage-length-distribution-Reverse.txt", + "mapdamage-threeprime_misinc_plot.txt", + "multiqc.log", + "multiqc_bcftools_stats.txt", + "multiqc_citations.txt", + "multiqc_damageprofiler_metrics.txt", + "multiqc_data.json", + "multiqc_fastp.txt", + "multiqc_general_stats.txt", + "multiqc_qualimap_bamqc_genome_results.txt", + "multiqc_samtools_flagstat.txt", + "multiqc_software_versions.txt", + "multiqc_sources.txt", + "preseq.txt", + "preseq_complexity_plot_molecules.txt", + "qualimap_coverage_histogram.txt", + "qualimap_gc_content.txt", + "qualimap_genome_fraction.txt", + "samtools-flagstat-dp_Percentage_of_total.txt", + "samtools-flagstat-dp_Read_counts.txt", + "threeprime_misinc_plot.txt", + "bcftools-stats-subtypes-cnt.pdf", + "bcftools-stats-subtypes-pct.pdf", + "bcftools_stats_indel-lengths.pdf", + "bcftools_stats_variant_depths.pdf", + "bcftools_stats_vqc_Count_Indels.pdf", + "bcftools_stats_vqc_Count_SNP.pdf", + "bcftools_stats_vqc_Count_Transitions.pdf", + "bcftools_stats_vqc_Count_Transversions.pdf", + "fastp-insert-size-plot.pdf", + "fastp-seq-content-gc-plot_Merged_and_filtered.pdf", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "fastp-seq-content-n-plot_Merged_and_filtered.pdf", + "fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "fastp-seq-quality-plot_Merged_and_filtered.pdf", + "fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "fastp_filtered_reads_plot-cnt.pdf", + "fastp_filtered_reads_plot-pct.pdf", + "fiveprime_misinc_plot.pdf", + "general_stats_table.pdf", + "length-distribution-Forward.pdf", + "length-distribution-Reverse.pdf", + "mapdamage-fiveprime_misinc_plot.pdf", + "mapdamage-length-distribution-Forward.pdf", + "mapdamage-length-distribution-Reverse.pdf", + "mapdamage-threeprime_misinc_plot.pdf", + "preseq_complexity_plot_molecules.pdf", + "qualimap_coverage_histogram.pdf", + "qualimap_gc_content.pdf", + "qualimap_genome_fraction.pdf", + "samtools-flagstat-dp_Percentage_of_total.pdf", + "samtools-flagstat-dp_Read_counts.pdf", + "threeprime_misinc_plot.pdf", + "bcftools-stats-subtypes-cnt.png", + "bcftools-stats-subtypes-pct.png", + "bcftools_stats_indel-lengths.png", + "bcftools_stats_variant_depths.png", + "bcftools_stats_vqc_Count_Indels.png", + "bcftools_stats_vqc_Count_SNP.png", + "bcftools_stats_vqc_Count_Transitions.png", + "bcftools_stats_vqc_Count_Transversions.png", + "fastp-insert-size-plot.png", + "fastp-seq-content-gc-plot_Merged_and_filtered.png", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "fastp-seq-content-n-plot_Merged_and_filtered.png", + "fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "fastp-seq-quality-plot_Merged_and_filtered.png", + "fastp-seq-quality-plot_Read_1_Before_filtering.png", + "fastp-seq-quality-plot_Read_2_Before_filtering.png", + "fastp_filtered_reads_plot-cnt.png", + "fastp_filtered_reads_plot-pct.png", + "fiveprime_misinc_plot.png", + "general_stats_table.png", + "length-distribution-Forward.png", + "length-distribution-Reverse.png", + "mapdamage-fiveprime_misinc_plot.png", + "mapdamage-length-distribution-Forward.png", + "mapdamage-length-distribution-Reverse.png", + "mapdamage-threeprime_misinc_plot.png", + "preseq_complexity_plot_molecules.png", + "qualimap_coverage_histogram.png", + "qualimap_gc_content.png", + "qualimap_genome_fraction.png", + "samtools-flagstat-dp_Percentage_of_total.png", + "samtools-flagstat-dp_Read_counts.png", + "threeprime_misinc_plot.png", + "bcftools-stats-subtypes-cnt.svg", + "bcftools-stats-subtypes-pct.svg", + "bcftools_stats_indel-lengths.svg", + "bcftools_stats_variant_depths.svg", + "bcftools_stats_vqc_Count_Indels.svg", + "bcftools_stats_vqc_Count_SNP.svg", + "bcftools_stats_vqc_Count_Transitions.svg", + "bcftools_stats_vqc_Count_Transversions.svg", + "fastp-insert-size-plot.svg", + "fastp-seq-content-gc-plot_Merged_and_filtered.svg", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "fastp-seq-content-n-plot_Merged_and_filtered.svg", + "fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "fastp-seq-quality-plot_Merged_and_filtered.svg", + "fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "fastp_filtered_reads_plot-cnt.svg", + "fastp_filtered_reads_plot-pct.svg", + "fiveprime_misinc_plot.svg", + "general_stats_table.svg", + "length-distribution-Forward.svg", + "length-distribution-Reverse.svg", + "mapdamage-fiveprime_misinc_plot.svg", + "mapdamage-length-distribution-Forward.svg", + "mapdamage-length-distribution-Reverse.svg", + "mapdamage-threeprime_misinc_plot.svg", + "preseq_complexity_plot_molecules.svg", + "qualimap_coverage_histogram.svg", + "qualimap_gc_content.svg", + "qualimap_genome_fraction.svg", + "samtools-flagstat-dp_Percentage_of_total.svg", + "samtools-flagstat-dp_Read_counts.svg", + "threeprime_misinc_plot.svg", + "multiqc_report.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1_data.txt", + "JK2782_JK2782_TGGCCGATCAACGA_L1_report.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1_summary.txt", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_fastqc_data.txt", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_fastqc_report.html", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_summary.txt", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_fastqc_data.txt", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_fastqc_report.html", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_summary.txt", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.json", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.log", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_filtered.flagstat", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_filtered.flagstat" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:34.825464" + }, + "read_filtering": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_filtered.flagstat:md5,e020b9f057207812f1d7d4c2dc2775c7", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_filtered.flagstat:md5,995f2c36894ef7c9954c924f125a7fb1" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.581877" + }, + "genotyping_vcfs": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause.vcf.gz:header_md5,f028f9b24ba56d5d10a08e033fd10dc4", + "JK2782_hs37d5_chr21-MT.vcf.gz:header_md5,0d967f517a2027bce43b08f0b8ca1e58" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.891115" + }, + "preprocessing": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_L1_data.txt:md5,59a2b8a41a68e90cf1ce8490973eb55a", + "JK2782_JK2782_TGGCCGATCAACGA_L1_summary.txt:md5,32133b6c95c4307ec05287b2626fe962", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_fastqc_data.txt:md5,81e758dc2ae45da2522e723d61534ed8", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_summary.txt:md5,14ac35d492f82bea791844ebc85dac4b", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_fastqc_data.txt:md5,e4a62b746b9ba6889a1a31cde7831b3c", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_summary.txt:md5,9463c4fbc53f6c588cb1652d59566402", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.json:md5,33ac3cf9cb1351d4a72c910c84db9983" + ], + [ + "JK2782_JK2782_TGGCCGATCAACGA_L1_report.html", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_fastqc_report.html", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_fastqc_report.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.log" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.4904" + }, + "metagenomics": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_metagenomics_fastq_unmapped_other.krakenuniq.report.txt:md5,b1e7642b1411262ea533b2f7f6dbb01e", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_metagenomics_fastq_unmapped_other.krakenuniq.report.txt:md5,1eac7636038d877f3aeed669f1ce9fea" + ], + [ + "krakenuniq_taxpasta_table.tsv" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.964266" + }, + "genotyping": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause.bcftools_stats.txt:md5,61ea79d61d3db9758e2ef15b5e895580", + "JK2782_hs37d5_chr21-MT.bcftools_stats.txt:md5,f8570bcd90546d6543ee4e8aa3363c28" + ], + [ + "JK2782_Mammoth_MT_Krause.vcf.gz.tbi", + "JK2782_hs37d5_chr21-MT.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.632792" + }, + "final_bams": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause.flagstat:md5,dc1a84b8181885ff72a221576d916326", + "JK2782_hs37d5_chr21-MT.flagstat:md5,995f2c36894ef7c9954c924f125a7fb1" + ], + [ + "JK2782_Mammoth_MT_Krause.bam", + "JK2782_Mammoth_MT_Krause.bam.bai", + "JK2782_hs37d5_chr21-MT.bam", + "JK2782_hs37d5_chr21-MT.bam.bai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.339986" + }, + "deduplication": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.flagstat:md5,dc1a84b8181885ff72a221576d916326", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.flagstat:md5,995f2c36894ef7c9954c924f125a7fb1" + ], + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.bam", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.bam", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.bam.bai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.251664" + }, + "multiqc": { + "content": [ + [ + "bcftools-stats-subtypes.txt", + "bcftools_stats_indel-lengths.txt", + "bcftools_stats_variant_depths.txt", + "bcftools_stats_vqc_Count_Indels.txt", + "bcftools_stats_vqc_Count_SNP.txt", + "bcftools_stats_vqc_Count_Transitions.txt", + "bcftools_stats_vqc_Count_Transversions.txt", + "fastp-insert-size-plot.txt", + "fastp-seq-content-gc-plot_Merged_and_filtered.txt", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "fastp-seq-content-n-plot_Merged_and_filtered.txt", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "fastp-seq-quality-plot_Merged_and_filtered.txt", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "fastp_filtered_reads_plot.txt", + "fiveprime_misinc_plot.txt", + "length-distribution-Forward.txt", + "length-distribution-Reverse.txt", + "mapdamage-fiveprime_misinc_plot.txt", + "mapdamage-length-distribution-Forward.txt", + "mapdamage-length-distribution-Reverse.txt", + "mapdamage-threeprime_misinc_plot.txt", + "multiqc.log", + "multiqc_bcftools_stats.txt", + "multiqc_citations.txt", + "multiqc_damageprofiler_metrics.txt", + "multiqc_data.json", + "multiqc_fastp.txt", + "multiqc_general_stats.txt", + "multiqc_qualimap_bamqc_genome_results.txt", + "multiqc_samtools_flagstat.txt", + "multiqc_software_versions.txt", + "multiqc_sources.txt", + "preseq.txt", + "preseq_complexity_plot_molecules.txt", + "qualimap_coverage_histogram.txt", + "qualimap_gc_content.txt", + "qualimap_genome_fraction.txt", + "samtools-flagstat-dp_Percentage_of_total.txt", + "samtools-flagstat-dp_Read_counts.txt", + "threeprime_misinc_plot.txt", + "bcftools-stats-subtypes-cnt.pdf", + "bcftools-stats-subtypes-pct.pdf", + "bcftools_stats_indel-lengths.pdf", + "bcftools_stats_variant_depths.pdf", + "bcftools_stats_vqc_Count_Indels.pdf", + "bcftools_stats_vqc_Count_SNP.pdf", + "bcftools_stats_vqc_Count_Transitions.pdf", + "bcftools_stats_vqc_Count_Transversions.pdf", + "fastp-insert-size-plot.pdf", + "fastp-seq-content-gc-plot_Merged_and_filtered.pdf", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "fastp-seq-content-n-plot_Merged_and_filtered.pdf", + "fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "fastp-seq-quality-plot_Merged_and_filtered.pdf", + "fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "fastp_filtered_reads_plot-cnt.pdf", + "fastp_filtered_reads_plot-pct.pdf", + "fiveprime_misinc_plot.pdf", + "general_stats_table.pdf", + "length-distribution-Forward.pdf", + "length-distribution-Reverse.pdf", + "mapdamage-fiveprime_misinc_plot.pdf", + "mapdamage-length-distribution-Forward.pdf", + "mapdamage-length-distribution-Reverse.pdf", + "mapdamage-threeprime_misinc_plot.pdf", + "preseq_complexity_plot_molecules.pdf", + "qualimap_coverage_histogram.pdf", + "qualimap_gc_content.pdf", + "qualimap_genome_fraction.pdf", + "samtools-flagstat-dp_Percentage_of_total.pdf", + "samtools-flagstat-dp_Read_counts.pdf", + "threeprime_misinc_plot.pdf", + "bcftools-stats-subtypes-cnt.png", + "bcftools-stats-subtypes-pct.png", + "bcftools_stats_indel-lengths.png", + "bcftools_stats_variant_depths.png", + "bcftools_stats_vqc_Count_Indels.png", + "bcftools_stats_vqc_Count_SNP.png", + "bcftools_stats_vqc_Count_Transitions.png", + "bcftools_stats_vqc_Count_Transversions.png", + "fastp-insert-size-plot.png", + "fastp-seq-content-gc-plot_Merged_and_filtered.png", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "fastp-seq-content-n-plot_Merged_and_filtered.png", + "fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "fastp-seq-quality-plot_Merged_and_filtered.png", + "fastp-seq-quality-plot_Read_1_Before_filtering.png", + "fastp-seq-quality-plot_Read_2_Before_filtering.png", + "fastp_filtered_reads_plot-cnt.png", + "fastp_filtered_reads_plot-pct.png", + "fiveprime_misinc_plot.png", + "general_stats_table.png", + "length-distribution-Forward.png", + "length-distribution-Reverse.png", + "mapdamage-fiveprime_misinc_plot.png", + "mapdamage-length-distribution-Forward.png", + "mapdamage-length-distribution-Reverse.png", + "mapdamage-threeprime_misinc_plot.png", + "preseq_complexity_plot_molecules.png", + "qualimap_coverage_histogram.png", + "qualimap_gc_content.png", + "qualimap_genome_fraction.png", + "samtools-flagstat-dp_Percentage_of_total.png", + "samtools-flagstat-dp_Read_counts.png", + "threeprime_misinc_plot.png", + "bcftools-stats-subtypes-cnt.svg", + "bcftools-stats-subtypes-pct.svg", + "bcftools_stats_indel-lengths.svg", + "bcftools_stats_variant_depths.svg", + "bcftools_stats_vqc_Count_Indels.svg", + "bcftools_stats_vqc_Count_SNP.svg", + "bcftools_stats_vqc_Count_Transitions.svg", + "bcftools_stats_vqc_Count_Transversions.svg", + "fastp-insert-size-plot.svg", + "fastp-seq-content-gc-plot_Merged_and_filtered.svg", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "fastp-seq-content-n-plot_Merged_and_filtered.svg", + "fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "fastp-seq-quality-plot_Merged_and_filtered.svg", + "fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "fastp_filtered_reads_plot-cnt.svg", + "fastp_filtered_reads_plot-pct.svg", + "fiveprime_misinc_plot.svg", + "general_stats_table.svg", + "length-distribution-Forward.svg", + "length-distribution-Reverse.svg", + "mapdamage-fiveprime_misinc_plot.svg", + "mapdamage-length-distribution-Forward.svg", + "mapdamage-length-distribution-Reverse.svg", + "mapdamage-threeprime_misinc_plot.svg", + "preseq_complexity_plot_molecules.svg", + "qualimap_coverage_histogram.svg", + "qualimap_gc_content.svg", + "qualimap_genome_fraction.svg", + "samtools-flagstat-dp_Percentage_of_total.svg", + "samtools-flagstat-dp_Read_counts.svg", + "threeprime_misinc_plot.svg", + "multiqc_report.html" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:36.039675" + }, + "mapping_output": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.flagstat:md5,1b7e5d27190dc712f107f4e274d43378", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.flagstat:md5,f5b65baf228adddd68359f5dad5162a6" + ], + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.bam", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.bam", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.bam.bai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.419002" + }, + "authentication": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause_depth.bed:md5,fb8622fe4f5c61d492dccaf4aada12f8", + "3pGtoA_freq.txt:md5,25a4caf935e2f9e515b0ae3296eaaefa", + "5pCtoT_freq.txt:md5,96e6e1f7a092c2d74c8c1b6d92107b4f", + "dmgprof.json:md5,3217dc500f1e092744dcde51f0cfd136", + "editDistance.txt:md5,8153fa52a92ec3159c3cb9541b473ce4", + "lgdistribution.txt:md5,079157ae272bceb736ffeebea78ac895", + "3pGtoA_freq.txt:md5,1f420f7930f6966d868b386dd3289ff9", + "5pCtoT_freq.txt:md5,381bd0d3782f745b48d20a4024e88d0d", + "dmgprof.json:md5,9220a6e588b97dc37d643b4c5b11361d", + "editDistance.txt:md5,97f16676725302ebe8ceeede42a8d7fd", + "lgdistribution.txt:md5,173232f3c31bc70869f0069b9694e6e8", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_percent_on_target_mqc.json:md5,a0ba9e85a612bc78341a40d7c37913d7", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_percent_on_target_mqc.json:md5,cc397422fe920aae9a50c6f176320fd3", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.c_curve.txt:md5,07edc21d807f0ba05e1ebc25dbf1a6f5", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.command.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.c_curve.txt:md5,cf4743abdd355595d6ec1fb3f38e66e5", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.command.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "agogo.css:md5,bd757b1a7ce6fdc0288ba148680f4583", + "ajax-loader.gif:md5,ae6667053ad118020b8e68ccf307b519", + "basic.css:md5,25b2823342c0604924a2870eeb4e7e94", + "doctools.js:md5,5ff571aa60e63f69c1890283e240ff8d", + "jquery.js:md5,10092eee563dec2dca82b77d2cf5a1ae", + "pygments.css:md5,d625a0adb949f181bd0d3f1432b0fa7f", + "report.css:md5,7a5f09eaf7c176f966f4e8854168b812", + "searchtools.js:md5,d550841adeedc8ed47c40ee607620937", + "underscore.js:md5,db5ba047a66617d4cd3e8c5099cc51db", + "websupport.js:md5,9e61e1e8a7433c56bd7e5a615affcf85", + "coverage_across_reference.txt:md5,b001e266a182e478d28e431bdaed32db", + "coverage_histogram.txt:md5,2ce0583063044147b0d9d96d6a0635c3", + "duplication_rate_histogram.txt:md5,e6c9e9b9fb0bc1afa5f86a88d50edb3b", + "genome_fraction_coverage.txt:md5,a1ced5dc2e0f55b9b5147822c31994bb", + "homopolymer_indels.txt:md5,2cc3cb5d9105a4d5feca2b3dbe32494e", + "mapped_reads_clipping_profile.txt:md5,402f69e636aa0fca915459c0c1a00a9f", + "mapping_quality_across_reference.txt:md5,89eba94fe07faa8ddcb52f264e3dc5ab", + "mapping_quality_histogram.txt:md5,d7fb55f12bf19b4ee41facb02262f590", + "agogo.css:md5,bd757b1a7ce6fdc0288ba148680f4583", + "ajax-loader.gif:md5,ae6667053ad118020b8e68ccf307b519", + "basic.css:md5,25b2823342c0604924a2870eeb4e7e94", + "doctools.js:md5,5ff571aa60e63f69c1890283e240ff8d", + "jquery.js:md5,10092eee563dec2dca82b77d2cf5a1ae", + "pygments.css:md5,d625a0adb949f181bd0d3f1432b0fa7f", + "report.css:md5,7a5f09eaf7c176f966f4e8854168b812", + "searchtools.js:md5,d550841adeedc8ed47c40ee607620937", + "underscore.js:md5,db5ba047a66617d4cd3e8c5099cc51db", + "websupport.js:md5,9e61e1e8a7433c56bd7e5a615affcf85" + ], + [ + "3p_freq_misincorporations.txt", + "5p_freq_misincorporations.txt", + "DNA_comp_genome.txt", + "DNA_composition_sample.txt", + "DamagePlot.pdf", + "DamagePlot_five_prime.svg", + "DamagePlot_three_prime.svg", + "DamageProfiler.log", + "Length_plot.pdf", + "Length_plot_combined_data.svg", + "Length_plot_forward_reverse_separated.svg", + "edit_distance.pdf", + "edit_distance.svg", + "misincorporation.txt", + "3p_freq_misincorporations.txt", + "5p_freq_misincorporations.txt", + "DNA_comp_genome.txt", + "DNA_composition_sample.txt", + "DamagePlot.pdf", + "DamagePlot_five_prime.svg", + "DamagePlot_three_prime.svg", + "DamageProfiler.log", + "Length_plot.pdf", + "Length_plot_combined_data.svg", + "Length_plot_forward_reverse_separated.svg", + "edit_distance.pdf", + "edit_distance.svg", + "misincorporation.txt", + "bgfooter.png", + "bgtop.png", + "comment-bright.png", + "comment-close.png", + "comment.png", + "down-pressed.png", + "down.png", + "file.png", + "minus.png", + "plus.png", + "qualimap_logo_small.png", + "up-pressed.png", + "up.png", + "genome_results.txt", + "genome_coverage_0to50_histogram.png", + "genome_coverage_across_reference.png", + "genome_coverage_histogram.png", + "genome_coverage_quotes.png", + "genome_gc_content_per_window.png", + "genome_homopolymer_indels.png", + "genome_mapping_quality_across_reference.png", + "genome_mapping_quality_histogram.png", + "genome_reads_clipping_profile.png", + "genome_reads_content_per_read_position.png", + "genome_uniq_read_starts_histogram.png", + "qualimapReport.html", + "mapped_reads_gc-content_distribution.txt", + "mapped_reads_nucleotide_content.txt", + "bgfooter.png", + "bgtop.png", + "comment-bright.png", + "comment-close.png", + "comment.png", + "down-pressed.png", + "down.png", + "file.png", + "minus.png", + "plus.png", + "qualimap_logo_small.png", + "up-pressed.png", + "up.png", + "genome_results.txt", + "qualimapReport.html" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:34.918926" + } +} \ No newline at end of file From a739d7fcfc29d7370592c4bd3e88ec64e755631e Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 18 Jul 2025 11:52:20 +0200 Subject: [PATCH 2/8] No stable content for metagenomics. --- tests/test_microbial.nf.test | 16 ++++++++-------- tests/test_microbial.nf.test.snap | 8 +++----- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/test_microbial.nf.test b/tests/test_microbial.nf.test index a9625c70f..b68434c1a 100644 --- a/tests/test_microbial.nf.test +++ b/tests/test_microbial.nf.test @@ -77,24 +77,24 @@ nextflow_pipeline { def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) - // // Preprocessing - // // NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing. + // Preprocessing + // NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing. def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] ) def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] ) - // // Read filtering + // Read filtering def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) - // // Genotyping + // Genotyping def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{tbi,vcf.gz}'] , ignoreFile: null , include: ['**/*'] ) def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.tbi'] ) // We need to collect the vcfs separately to run more specific md5sum checks on the header (contnts are unstable due to same reasons as BAMs, explained above). def genotyping_vcfs = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.vcf.gz'] ) - // // Metagenomics - def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*table.tsv'] , ignoreFile: null , include: ['**/*'] ) - def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*table.tsv'] ) + // Metagenomics + // This section has no stable content, because the kraken report files contain a timestamp, and the taxpasta results change ever so slightly because of inconsistent BAM files. + def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*'] ) // MultiQC def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] ) @@ -137,7 +137,7 @@ nextflow_pipeline { file.getName() + ":header_md5," + header_md5 } ).match("genotyping_vcfs")}, - { assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") }, + { assert snapshot( stable_name_metagenomics*.name ).match("metagenomics") }, { assert snapshot( stable_name_multiqc*.name ).match("multiqc") }, // Versions diff --git a/tests/test_microbial.nf.test.snap b/tests/test_microbial.nf.test.snap index f1a9a88f5..db35d813c 100644 --- a/tests/test_microbial.nf.test.snap +++ b/tests/test_microbial.nf.test.snap @@ -374,10 +374,8 @@ "metagenomics": { "content": [ [ - "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_metagenomics_fastq_unmapped_other.krakenuniq.report.txt:md5,b1e7642b1411262ea533b2f7f6dbb01e", - "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_metagenomics_fastq_unmapped_other.krakenuniq.report.txt:md5,1eac7636038d877f3aeed669f1ce9fea" - ], - [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", "krakenuniq_taxpasta_table.tsv" ] ], @@ -385,7 +383,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.6" }, - "timestamp": "2025-06-20T11:36:35.964266" + "timestamp": "2025-07-18T11:45:53.966751" }, "genotyping": { "content": [ From f3d8ac86e450f0e861e04cde5678260b8b65097c Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Fri, 12 Sep 2025 10:59:15 +0200 Subject: [PATCH 3/8] Update test_microbial test and comments --- tests/test_microbial.nf.test | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_microbial.nf.test b/tests/test_microbial.nf.test index b68434c1a..67dfa5e06 100644 --- a/tests/test_microbial.nf.test +++ b/tests/test_microbial.nf.test @@ -5,8 +5,9 @@ nextflow_pipeline { tag "pipeline" tag "nfcore_eager" tag "test_microbial" + profile "test_microbial" - test("test_microbial_profile") { + test("Test `test_microbial` profile:") { when { params { @@ -28,8 +29,8 @@ nextflow_pipeline { // If a directory is fully stable, you can drop `stable_name_*` // If a directory contains no BAMs, you can drop `bams_*` - // Generate with: nf-test test --tag test_microbial --profile docker,test_microbial --update-snapshot - // Test with: nf-test test --tag test_microbial --profile docker,test_microbial + // Generate with: nf-test test --profile +docker --tag test_microbial --update-snapshot + // Test with: nf-test test --profile +docker --tag test_microbial // NOTE: BAMs are always only stable in name, because: // a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112) // b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order) From d7cda1a9eb1fe93f14e89700f3864b96ef05e2c3 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 12 Sep 2025 11:29:13 +0200 Subject: [PATCH 4/8] exclude preseq command log from checksum --- tests/test_microbial.nf.test | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_microbial.nf.test b/tests/test_microbial.nf.test index 67dfa5e06..717781b31 100644 --- a/tests/test_microbial.nf.test +++ b/tests/test_microbial.nf.test @@ -56,6 +56,7 @@ nextflow_pipeline { '**/DNA_composition_sample.txt', '**/misincorporation.txt', '**/genome_results.txt', + '**/*command.log', ] // Check that no files are missing/added From 84033a0a53000c0004f6b478678d0a1ca7285b5f Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 12 Sep 2025 11:36:12 +0200 Subject: [PATCH 5/8] remove leftover todo --- tests/test_microbial.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_microbial.nf.test b/tests/test_microbial.nf.test index 717781b31..9fd4d1041 100644 --- a/tests/test_microbial.nf.test +++ b/tests/test_microbial.nf.test @@ -67,7 +67,7 @@ nextflow_pipeline { def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] ) def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth) - // Deduplication - TODO -> snapshot both lists are empty!? + // Deduplication def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) From 1c80d8ef7da5018a18c87297982dec9e65f8ee26 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 12 Sep 2025 11:36:52 +0200 Subject: [PATCH 6/8] Remove leftover Todo --- tests/default.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/default.nf.test b/tests/default.nf.test index 884747269..6d29b2dad 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -67,7 +67,7 @@ nextflow_pipeline { def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] ) def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth) - // Deduplication - TODO -> snapshot both lists are empty!? + // Deduplication def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) From 8d0383d74c5f89bf67962b676d44575dc7af7561 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Tue, 16 Sep 2025 12:38:30 +0200 Subject: [PATCH 7/8] fix error in modules.config --- conf/modules.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 92847ce9f..069ae1256 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1698,9 +1698,9 @@ process { [ ref_meta.ploidy ? "-p ${ref_meta.ploidy}" : '', "-C ${params.genotyping_freebayes_min_alternate_count}", - { params.genotyping_freebayes_skip_coverage == 0 ? "" : "-g ${params.genotyping_freebayes_skip_coverage}" }, - ] - }.join(' ').trim() + params.genotyping_freebayes_skip_coverage == 0 ? "" : "-g ${params.genotyping_freebayes_skip_coverage}", + ].join(' ').trim() + } } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ From 68d6732a68d583144ad10bf4908d5a71229098a0 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Tue, 16 Sep 2025 13:23:15 +0200 Subject: [PATCH 8/8] Update snapshot --- tests/test_microbial.nf.test.snap | 74 ++++++++++++++++--------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/tests/test_microbial.nf.test.snap b/tests/test_microbial.nf.test.snap index db35d813c..d89eafc75 100644 --- a/tests/test_microbial.nf.test.snap +++ b/tests/test_microbial.nf.test.snap @@ -145,6 +145,7 @@ "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", "krakenuniq_taxpasta_table.tsv", + "BETA-multiqc.parquet", "bcftools-stats-subtypes.txt", "bcftools_stats_indel-lengths.txt", "bcftools_stats_variant_depths.txt", @@ -186,12 +187,13 @@ "qualimap_coverage_histogram.txt", "qualimap_gc_content.txt", "qualimap_genome_fraction.txt", - "samtools-flagstat-dp_Percentage_of_total.txt", - "samtools-flagstat-dp_Read_counts.txt", + "samtools-flagstat-pct-table.txt", + "samtools-flagstat-table.txt", "threeprime_misinc_plot.txt", "bcftools-stats-subtypes-cnt.pdf", "bcftools-stats-subtypes-pct.pdf", - "bcftools_stats_indel-lengths.pdf", + "bcftools_stats_indel-lengths-cnt.pdf", + "bcftools_stats_indel-lengths-log.pdf", "bcftools_stats_variant_depths.pdf", "bcftools_stats_vqc_Count_Indels.pdf", "bcftools_stats_vqc_Count_SNP.pdf", @@ -210,7 +212,6 @@ "fastp_filtered_reads_plot-cnt.pdf", "fastp_filtered_reads_plot-pct.pdf", "fiveprime_misinc_plot.pdf", - "general_stats_table.pdf", "length-distribution-Forward.pdf", "length-distribution-Reverse.pdf", "mapdamage-fiveprime_misinc_plot.pdf", @@ -221,12 +222,13 @@ "qualimap_coverage_histogram.pdf", "qualimap_gc_content.pdf", "qualimap_genome_fraction.pdf", - "samtools-flagstat-dp_Percentage_of_total.pdf", - "samtools-flagstat-dp_Read_counts.pdf", + "samtools-flagstat-pct-table.pdf", + "samtools-flagstat-table.pdf", "threeprime_misinc_plot.pdf", "bcftools-stats-subtypes-cnt.png", "bcftools-stats-subtypes-pct.png", - "bcftools_stats_indel-lengths.png", + "bcftools_stats_indel-lengths-cnt.png", + "bcftools_stats_indel-lengths-log.png", "bcftools_stats_variant_depths.png", "bcftools_stats_vqc_Count_Indels.png", "bcftools_stats_vqc_Count_SNP.png", @@ -245,7 +247,6 @@ "fastp_filtered_reads_plot-cnt.png", "fastp_filtered_reads_plot-pct.png", "fiveprime_misinc_plot.png", - "general_stats_table.png", "length-distribution-Forward.png", "length-distribution-Reverse.png", "mapdamage-fiveprime_misinc_plot.png", @@ -256,12 +257,13 @@ "qualimap_coverage_histogram.png", "qualimap_gc_content.png", "qualimap_genome_fraction.png", - "samtools-flagstat-dp_Percentage_of_total.png", - "samtools-flagstat-dp_Read_counts.png", + "samtools-flagstat-pct-table.png", + "samtools-flagstat-table.png", "threeprime_misinc_plot.png", "bcftools-stats-subtypes-cnt.svg", "bcftools-stats-subtypes-pct.svg", - "bcftools_stats_indel-lengths.svg", + "bcftools_stats_indel-lengths-cnt.svg", + "bcftools_stats_indel-lengths-log.svg", "bcftools_stats_variant_depths.svg", "bcftools_stats_vqc_Count_Indels.svg", "bcftools_stats_vqc_Count_SNP.svg", @@ -280,7 +282,6 @@ "fastp_filtered_reads_plot-cnt.svg", "fastp_filtered_reads_plot-pct.svg", "fiveprime_misinc_plot.svg", - "general_stats_table.svg", "length-distribution-Forward.svg", "length-distribution-Reverse.svg", "mapdamage-fiveprime_misinc_plot.svg", @@ -291,8 +292,8 @@ "qualimap_coverage_histogram.svg", "qualimap_gc_content.svg", "qualimap_genome_fraction.svg", - "samtools-flagstat-dp_Percentage_of_total.svg", - "samtools-flagstat-dp_Read_counts.svg", + "samtools-flagstat-pct-table.svg", + "samtools-flagstat-table.svg", "threeprime_misinc_plot.svg", "multiqc_report.html", "JK2782_JK2782_TGGCCGATCAACGA_L1_data.txt", @@ -313,9 +314,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.6" + "nextflow": "25.04.7" }, - "timestamp": "2025-06-20T11:36:34.825464" + "timestamp": "2025-09-16T13:00:10.310232" }, "read_filtering": { "content": [ @@ -443,6 +444,7 @@ "multiqc": { "content": [ [ + "BETA-multiqc.parquet", "bcftools-stats-subtypes.txt", "bcftools_stats_indel-lengths.txt", "bcftools_stats_variant_depths.txt", @@ -484,12 +486,13 @@ "qualimap_coverage_histogram.txt", "qualimap_gc_content.txt", "qualimap_genome_fraction.txt", - "samtools-flagstat-dp_Percentage_of_total.txt", - "samtools-flagstat-dp_Read_counts.txt", + "samtools-flagstat-pct-table.txt", + "samtools-flagstat-table.txt", "threeprime_misinc_plot.txt", "bcftools-stats-subtypes-cnt.pdf", "bcftools-stats-subtypes-pct.pdf", - "bcftools_stats_indel-lengths.pdf", + "bcftools_stats_indel-lengths-cnt.pdf", + "bcftools_stats_indel-lengths-log.pdf", "bcftools_stats_variant_depths.pdf", "bcftools_stats_vqc_Count_Indels.pdf", "bcftools_stats_vqc_Count_SNP.pdf", @@ -508,7 +511,6 @@ "fastp_filtered_reads_plot-cnt.pdf", "fastp_filtered_reads_plot-pct.pdf", "fiveprime_misinc_plot.pdf", - "general_stats_table.pdf", "length-distribution-Forward.pdf", "length-distribution-Reverse.pdf", "mapdamage-fiveprime_misinc_plot.pdf", @@ -519,12 +521,13 @@ "qualimap_coverage_histogram.pdf", "qualimap_gc_content.pdf", "qualimap_genome_fraction.pdf", - "samtools-flagstat-dp_Percentage_of_total.pdf", - "samtools-flagstat-dp_Read_counts.pdf", + "samtools-flagstat-pct-table.pdf", + "samtools-flagstat-table.pdf", "threeprime_misinc_plot.pdf", "bcftools-stats-subtypes-cnt.png", "bcftools-stats-subtypes-pct.png", - "bcftools_stats_indel-lengths.png", + "bcftools_stats_indel-lengths-cnt.png", + "bcftools_stats_indel-lengths-log.png", "bcftools_stats_variant_depths.png", "bcftools_stats_vqc_Count_Indels.png", "bcftools_stats_vqc_Count_SNP.png", @@ -543,7 +546,6 @@ "fastp_filtered_reads_plot-cnt.png", "fastp_filtered_reads_plot-pct.png", "fiveprime_misinc_plot.png", - "general_stats_table.png", "length-distribution-Forward.png", "length-distribution-Reverse.png", "mapdamage-fiveprime_misinc_plot.png", @@ -554,12 +556,13 @@ "qualimap_coverage_histogram.png", "qualimap_gc_content.png", "qualimap_genome_fraction.png", - "samtools-flagstat-dp_Percentage_of_total.png", - "samtools-flagstat-dp_Read_counts.png", + "samtools-flagstat-pct-table.png", + "samtools-flagstat-table.png", "threeprime_misinc_plot.png", "bcftools-stats-subtypes-cnt.svg", "bcftools-stats-subtypes-pct.svg", - "bcftools_stats_indel-lengths.svg", + "bcftools_stats_indel-lengths-cnt.svg", + "bcftools_stats_indel-lengths-log.svg", "bcftools_stats_variant_depths.svg", "bcftools_stats_vqc_Count_Indels.svg", "bcftools_stats_vqc_Count_SNP.svg", @@ -578,7 +581,6 @@ "fastp_filtered_reads_plot-cnt.svg", "fastp_filtered_reads_plot-pct.svg", "fiveprime_misinc_plot.svg", - "general_stats_table.svg", "length-distribution-Forward.svg", "length-distribution-Reverse.svg", "mapdamage-fiveprime_misinc_plot.svg", @@ -589,17 +591,17 @@ "qualimap_coverage_histogram.svg", "qualimap_gc_content.svg", "qualimap_genome_fraction.svg", - "samtools-flagstat-dp_Percentage_of_total.svg", - "samtools-flagstat-dp_Read_counts.svg", + "samtools-flagstat-pct-table.svg", + "samtools-flagstat-table.svg", "threeprime_misinc_plot.svg", "multiqc_report.html" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.6" + "nextflow": "25.04.7" }, - "timestamp": "2025-06-20T11:36:36.039675" + "timestamp": "2025-09-16T13:00:10.794583" }, "mapping_output": { "content": [ @@ -637,9 +639,7 @@ "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_percent_on_target_mqc.json:md5,a0ba9e85a612bc78341a40d7c37913d7", "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_percent_on_target_mqc.json:md5,cc397422fe920aae9a50c6f176320fd3", "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.c_curve.txt:md5,07edc21d807f0ba05e1ebc25dbf1a6f5", - "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.command.log:md5,d41d8cd98f00b204e9800998ecf8427e", "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.c_curve.txt:md5,cf4743abdd355595d6ec1fb3f38e66e5", - "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.command.log:md5,d41d8cd98f00b204e9800998ecf8427e", "agogo.css:md5,bd757b1a7ce6fdc0288ba148680f4583", "ajax-loader.gif:md5,ae6667053ad118020b8e68ccf307b519", "basic.css:md5,25b2823342c0604924a2870eeb4e7e94", @@ -698,6 +698,8 @@ "edit_distance.pdf", "edit_distance.svg", "misincorporation.txt", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.command.log", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.command.log", "bgfooter.png", "bgtop.png", "comment-bright.png", @@ -745,8 +747,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.6" + "nextflow": "25.04.7" }, - "timestamp": "2025-06-20T11:36:34.918926" + "timestamp": "2025-09-16T13:00:10.465714" } } \ No newline at end of file