From 620201be2d1dcf6fb8d5eaac72ca16bdf96df5eb Mon Sep 17 00:00:00 2001 From: Lydia Buntrock Date: Thu, 16 Sep 2021 18:24:23 +0200 Subject: [PATCH] [MISC] Apply 2. Review Signed-off-by: Lydia Buntrock --- include/iGenVar.hpp | 4 +++- .../hierarchical_clustering_method.hpp | 5 +---- .../analyze_cigar_method.hpp | 4 +--- .../analyze_split_read_method.hpp | 4 +--- src/iGenVar.cpp | 4 ++-- .../hierarchical_clustering_method.cpp | 7 +++---- .../analyze_cigar_method.cpp | 8 +++---- .../analyze_split_read_method.cpp | 9 ++++---- src/variant_detection/variant_detection.cpp | 3 +-- test/api/clustering_test.cpp | 21 ++++++++++--------- test/api/detection_test.cpp | 20 +++++++----------- test/api/input_file_test.cpp | 6 +----- 12 files changed, 40 insertions(+), 55 deletions(-) diff --git a/include/iGenVar.hpp b/include/iGenVar.hpp index fe4ebde5..20eb0b9a 100644 --- a/include/iGenVar.hpp +++ b/include/iGenVar.hpp @@ -4,6 +4,8 @@ #include "variant_detection/method_enums.hpp" // for enum detection_methods, clustering_methods and refinement_methods +inline bool gVerbose{false}; + struct cmd_arguments { // Input: @@ -16,7 +18,7 @@ struct cmd_arguments /* -b */ std::filesystem::path clusters_file_path{}; // Others: /* -h - help - not part of the args struct */ - /* -v */ bool verbose = false; + /* -v - verbose - global variable gVerbose */ /* -t */ int16_t threads = 1; // Methods: /* -d */ std::vector methods{cigar_string, split_read, read_pairs, read_depth}; // default: all diff --git a/include/modules/clustering/hierarchical_clustering_method.hpp b/include/modules/clustering/hierarchical_clustering_method.hpp index ddaa0149..af7754f0 100644 --- a/include/modules/clustering/hierarchical_clustering_method.hpp +++ b/include/modules/clustering/hierarchical_clustering_method.hpp @@ -39,11 +39,8 @@ int junction_distance(Junction const & lhs, Junction const & rhs); * * \param[in] junctions - a vector of junctions (needs to be sorted) * \param[in] clustering_cutoff - distance cutoff for clustering - * \param[in] verbose - verbose option * * \details For the algorithms we use the library hclust. * \see https://lionel.kr.hs-niederrhein.de/~dalitz/data/hclust/ (last access 01.06.2021). */ -std::vector hierarchical_clustering_method(std::vector const & junctions, - double clustering_cutoff, - bool const verbose); +std::vector hierarchical_clustering_method(std::vector const & junctions, double clustering_cutoff); diff --git a/include/modules/sv_detection_methods/analyze_cigar_method.hpp b/include/modules/sv_detection_methods/analyze_cigar_method.hpp index 31b038df..7ab08958 100644 --- a/include/modules/sv_detection_methods/analyze_cigar_method.hpp +++ b/include/modules/sv_detection_methods/analyze_cigar_method.hpp @@ -11,7 +11,6 @@ * \param[in] query_sequence - SEQ field of the SAM/BAM file * \param[in, out] junctions - vector for storing junctions * \param[in] min_length - minimum length of variants to detect (default 30 bp, expected to be non-negative) - * \param[in] verbose - verbose option * * \details This function steps through the CIGAR string and stores junctions with their position in reference and read. * We distinguish 4 cases of CIGAR operation characters: @@ -37,5 +36,4 @@ void analyze_cigar(std::string const & read_name, std::vector & cigar_string, seqan3::dna5_vector const & query_sequence, std::vector & junctions, - int32_t const min_length, - bool const verbose); + int32_t const min_length); diff --git a/include/modules/sv_detection_methods/analyze_split_read_method.hpp b/include/modules/sv_detection_methods/analyze_split_read_method.hpp index 89e36f5b..392b0f20 100644 --- a/include/modules/sv_detection_methods/analyze_split_read_method.hpp +++ b/include/modules/sv_detection_methods/analyze_split_read_method.hpp @@ -44,15 +44,13 @@ void retrieve_aligned_segments(std::string const & sa_string, std::vector const & aligned_segments, std::vector & junctions, seqan3::dna5_vector const & query_sequence, std::string const & read_name, int32_t const min_length, - int32_t const max_overlap, - bool const verbose); + int32_t const max_overlap); /*! \brief Parse the SA tag from the SAM/BAM alignment of a chimeric/split-aligned read. Build * [aligned_segments](\ref AlignedSegment), one for each alignment segment of the read. diff --git a/src/iGenVar.cpp b/src/iGenVar.cpp index e03341da..d2ef1897 100644 --- a/src/iGenVar.cpp +++ b/src/iGenVar.cpp @@ -49,7 +49,7 @@ void initialize_argument_parser(seqan3::argument_parser & parser, cmd_arguments parser.add_option(args.threads, 't', "threads", "Specify the number of decompression threads used for reading BAM files.", seqan3::option_spec::standard); - parser.add_flag(args.verbose, 'v', "verbose", + parser.add_flag(gVerbose, 'v', "verbose", "If you set this flag to true, we provide additional details about what iGenVar does. The detailed " "output is printed in the standard output."); @@ -155,7 +155,7 @@ void detect_variants_in_alignment_file(cmd_arguments const & args) clusters = simple_clustering_method(junctions); break; case 1: // hierarchical clustering - clusters = hierarchical_clustering_method(junctions, args.hierarchical_clustering_cutoff, args.verbose); + clusters = hierarchical_clustering_method(junctions, args.hierarchical_clustering_cutoff); break; case 2: // self-balancing_binary_tree, seqan3::debug_stream << "The self-balancing binary tree clustering method is not yet implemented\n"; diff --git a/src/modules/clustering/hierarchical_clustering_method.cpp b/src/modules/clustering/hierarchical_clustering_method.cpp index f8e60d4d..d25a1a55 100644 --- a/src/modules/clustering/hierarchical_clustering_method.cpp +++ b/src/modules/clustering/hierarchical_clustering_method.cpp @@ -1,3 +1,4 @@ +#include "iGenVar.hpp" // for global variable gVerbose #include "modules/clustering/hierarchical_clustering_method.hpp" #include // for infinity @@ -117,9 +118,7 @@ inline std::vector subsample_partition(std::vector const & p return subsample; } -std::vector hierarchical_clustering_method(std::vector const & junctions, - double clustering_cutoff, - bool const verbose) +std::vector hierarchical_clustering_method(std::vector const & junctions, double clustering_cutoff) { auto partitions = partition_junctions(junctions); std::vector clusters{}; @@ -136,7 +135,7 @@ std::vector hierarchical_clustering_method(std::vector const } if (partition_size > max_partition_size) { - if (verbose) + if (gVerbose) { seqan3::debug_stream << "A partition exceeds the maximum size (" << partition_size diff --git a/src/modules/sv_detection_methods/analyze_cigar_method.cpp b/src/modules/sv_detection_methods/analyze_cigar_method.cpp index b4c298c3..368e2384 100644 --- a/src/modules/sv_detection_methods/analyze_cigar_method.cpp +++ b/src/modules/sv_detection_methods/analyze_cigar_method.cpp @@ -3,6 +3,7 @@ #include #include +#include "iGenVar.hpp" // for global variable gVerbose #include "structures/breakend.hpp" // for class Breakend #include "structures/junction.hpp" // for class Junction @@ -15,8 +16,7 @@ void analyze_cigar(std::string const & read_name, std::vector & cigar_string, seqan3::dna5_vector const & query_sequence, std::vector & junctions, - int32_t const min_length, - bool const verbose) + int32_t const min_length) { // Step through CIGAR string and store current position in reference and read int32_t pos_ref = query_start_pos; @@ -44,7 +44,7 @@ void analyze_cigar(std::string const & read_name, inserted_bases, tandem_dup_count, read_name}; - if (verbose) + if (gVerbose) seqan3::debug_stream << "INS: " << new_junction << "\n"; junctions.push_back(std::move(new_junction)); } @@ -60,7 +60,7 @@ void analyze_cigar(std::string const & read_name, ""_dna5, tandem_dup_count, read_name}; - if (verbose) + if (gVerbose) seqan3::debug_stream << "DEL: " << new_junction << "\n"; junctions.push_back(std::move(new_junction)); } diff --git a/src/modules/sv_detection_methods/analyze_split_read_method.cpp b/src/modules/sv_detection_methods/analyze_split_read_method.cpp index 98eee16f..b80a7624 100644 --- a/src/modules/sv_detection_methods/analyze_split_read_method.cpp +++ b/src/modules/sv_detection_methods/analyze_split_read_method.cpp @@ -1,3 +1,4 @@ +#include "iGenVar.hpp" // for global variable gVerbose #include "modules/sv_detection_methods/analyze_split_read_method.hpp" #include @@ -57,8 +58,7 @@ void analyze_aligned_segments(std::vector const & aligned_segmen seqan3::dna5_vector const & query_sequence, std::string const & read_name, int32_t const min_length, - int32_t const max_overlap, - bool const verbose) + int32_t const max_overlap) { size_t tandem_dup_count = 0; for (size_t i = 1; i < aligned_segments.size(); i++) @@ -120,7 +120,7 @@ void analyze_aligned_segments(std::vector const & aligned_segmen next.get_query_start()); junctions.emplace_back(mate1, mate2, inserted_bases, tandem_dup_count, read_name); } - if (verbose) + if (gVerbose) seqan3::debug_stream << "BND: " << junctions.back() << "\n"; } } @@ -150,6 +150,5 @@ void analyze_sa_tag(std::string const & query_name, seq, query_name, args.min_var_length, - args.max_overlap, - args.verbose); + args.max_overlap); } diff --git a/src/variant_detection/variant_detection.cpp b/src/variant_detection/variant_detection.cpp index 63b011d4..5b72cbef 100644 --- a/src/variant_detection/variant_detection.cpp +++ b/src/variant_detection/variant_detection.cpp @@ -146,8 +146,7 @@ void detect_junctions_in_long_reads_sam_file(std::vector & junctions, cigar, seq, junctions, - args.min_var_length, - args.verbose); + args.min_var_length); break; case detection_methods::split_read: // Detect junctions from split read evidence (SA tag, if (!hasFlagSupplementary(flag)) // primary alignments only) diff --git a/test/api/clustering_test.cpp b/test/api/clustering_test.cpp index cc7ab0d3..19ca5ff3 100644 --- a/test/api/clustering_test.cpp +++ b/test/api/clustering_test.cpp @@ -1,13 +1,12 @@ #include +#include "iGenVar.hpp" // for global variable gVerbose #include "modules/clustering/simple_clustering_method.hpp" // for the simple clustering method #include "modules/clustering/hierarchical_clustering_method.hpp" // for the hierarchical clustering method #include "structures/cluster.hpp" // for class Cluster using seqan3::operator""_dna5; -bool verbose = true; - /* -------- clustering methods tests -------- */ std::string const chrom1 = "chr1"; @@ -204,7 +203,7 @@ TEST(hierarchical_clustering, partitioning) TEST(hierarchical_clustering, strict_clustering) { std::vector input_junctions = prepare_input_junctions(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 0, verbose); + std::vector clusters = hierarchical_clustering_method(input_junctions, 0); // Each junction in separate cluster std::vector expected_clusters @@ -259,7 +258,7 @@ TEST(hierarchical_clustering, strict_clustering) TEST(hierarchical_clustering, clustering_10) { std::vector input_junctions = prepare_input_junctions(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 10, verbose); + std::vector clusters = hierarchical_clustering_method(input_junctions, 10); // Distance matrix for junctions from reads 1-3 and 6-8 // 1 2 3 @@ -322,7 +321,7 @@ TEST(hierarchical_clustering, clustering_10) TEST(hierarchical_clustering, clustering_15) { std::vector input_junctions = prepare_input_junctions(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 15, verbose); + std::vector clusters = hierarchical_clustering_method(input_junctions, 15); // Distance matrix for junctions from reads 1-3 and 6-8 // 1 2 3 @@ -383,7 +382,7 @@ TEST(hierarchical_clustering, clustering_15) TEST(hierarchical_clustering, clustering_25) { std::vector input_junctions = prepare_input_junctions(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 25, verbose); + std::vector clusters = hierarchical_clustering_method(input_junctions, 25); // Distance matrix for junctions from reads 1-3 and 6-8 // 1 2 3 @@ -450,6 +449,8 @@ TEST(hierarchical_clustering, clustering_25) TEST(hierarchical_clustering, subsampling) { + gVerbose = true; + std::vector input_junctions; for (int32_t i = 0; i < 300; ++i) { @@ -462,7 +463,7 @@ TEST(hierarchical_clustering, subsampling) std::sort(input_junctions.begin(), input_junctions.end()); testing::internal::CaptureStderr(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 0, verbose); + std::vector clusters = hierarchical_clustering_method(input_junctions, 0); size_t num_junctions = 0; for (Cluster const & cluster : clusters) @@ -527,10 +528,10 @@ TEST(hierarchical_clustering, cluster_tandem_dup_count) << " unequal"; } - resulting_clusters = hierarchical_clustering_method(input_junctions, 0, verbose); // clustering_cutoff = 0 + resulting_clusters = hierarchical_clustering_method(input_junctions, 0); // clustering_cutoff = 0 ASSERT_EQ(5, resulting_clusters.size()); - resulting_clusters = hierarchical_clustering_method(input_junctions, 1, verbose); // clustering_cutoff = 1 + resulting_clusters = hierarchical_clustering_method(input_junctions, 1); // clustering_cutoff = 1 ASSERT_EQ(expected_clusters.size(), resulting_clusters.size()); for (size_t cluster_index = 0; cluster_index < expected_clusters.size(); ++cluster_index) @@ -540,6 +541,6 @@ TEST(hierarchical_clustering, cluster_tandem_dup_count) << " unequal"; } - resulting_clusters = hierarchical_clustering_method(input_junctions, 10, verbose); // clustering_cutoff = 10 (default value) + resulting_clusters = hierarchical_clustering_method(input_junctions, 10); // clustering_cutoff = 10 (default value) ASSERT_EQ(1, resulting_clusters.size()); } diff --git a/test/api/detection_test.cpp b/test/api/detection_test.cpp index 84d484f8..792fa9e7 100644 --- a/test/api/detection_test.cpp +++ b/test/api/detection_test.cpp @@ -30,7 +30,7 @@ TEST(junction_detection, cigar_string_simple_del) { std::vector junctions_res{}; int32_t const min_var_length = 10; - analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose); + analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length); EXPECT_EQ(junctions_res.size(), 0); } @@ -39,7 +39,7 @@ TEST(junction_detection, cigar_string_simple_del) { std::vector junctions_res{}; int32_t const min_var_length = 5; - analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose); + analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length); Breakend new_breakend_1 {chromosome, 15, strand::forward}; Breakend new_breakend_2 {chromosome, 22, strand::forward}; @@ -73,7 +73,7 @@ TEST(junction_detection, cigar_string_del_padding) std::vector junctions_res{}; int32_t const min_var_length = 5; - analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose); + analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length); Breakend new_breakend_1 {chromosome, 15, strand::forward}; Breakend new_breakend_2 {chromosome, 22, strand::forward}; @@ -105,7 +105,7 @@ TEST(junction_detection, cigar_string_simple_ins) std::vector junctions_res{}; int32_t const min_var_length = 5; - analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose); + analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length); Breakend new_breakend_1 {chromosome, 9, strand::forward}; Breakend new_breakend_2 {chromosome, 10, strand::forward}; @@ -137,7 +137,7 @@ TEST(junction_detection, cigar_string_ins_hardclip) std::vector junctions_res{}; int32_t const min_var_length = 5; - analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose); + analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length); Breakend new_breakend_1 {chromosome, 9, strand::forward}; Breakend new_breakend_2 {chromosome, 10, strand::forward}; @@ -272,8 +272,7 @@ TEST(junction_detection, analyze_aligned_segments) query_sequence, read_name, 10, - 0, - verbose); + 0); Breakend new_breakend_1 {"chr1", 105, strand::forward}; Breakend new_breakend_2 {"chr2", 100, strand::forward}; @@ -331,8 +330,7 @@ TEST(junction_detection, analyze_aligned_segments) query_sequence, read_name, 20, - 0, - verbose); + 0); Breakend new_breakend_1 {"chr1", 105, strand::forward}; Breakend new_breakend_2 {"chr2", 100, strand::forward}; @@ -383,8 +381,7 @@ TEST(junction_detection, overlapping_segments) query_sequence, read_name, 10, - 10, - verbose)); + 10)); // Deletion from two overlapping alignment segments (overlap of 5bp) Breakend new_breakend_1 {"chr1", 119, strand::forward}; @@ -431,7 +428,6 @@ TEST(junction_detection, analyze_sa_tag) std::filesystem::path{}, // junctions_file_path std::filesystem::path{}, // clusters_file_path 1, // threads - false, // verbose std::vector{cigar_string, split_read, read_pairs, read_depth}, simple_clustering, sVirl_refinement_method, diff --git a/test/api/input_file_test.cpp b/test/api/input_file_test.cpp index c63d520c..d084332f 100644 --- a/test/api/input_file_test.cpp +++ b/test/api/input_file_test.cpp @@ -12,7 +12,7 @@ std::string const default_alignment_short_reads_file_path = DATADIR"paired_end_m std::string const default_alignment_long_reads_file_path = DATADIR"simulated.minimap2.hg19.coordsorted_cutoff.sam"; std::filesystem::path const empty_path{}; std::string default_vcf_sample_name{"MYSAMPLE"}; -bool const verbose = false; +bool verbose = false; constexpr int16_t default_threads = 1; std::vector const default_methods{cigar_string, split_read, read_pairs, read_depth}; constexpr int32_t default_min_length = 30; @@ -61,7 +61,6 @@ TEST(input_file, detect_junctions_in_short_read_sam_file) default_vcf_sample_name, empty_path, // empty junctions path, empty_path, // empty clusters path, - verbose, default_threads, default_methods, simple_clustering, @@ -102,7 +101,6 @@ TEST(input_file, detect_junctions_in_long_reads_sam_file) default_vcf_sample_name, empty_path, // empty junctions path, empty_path, // empty clusters path, - verbose, default_threads, default_methods, simple_clustering, @@ -217,7 +215,6 @@ TEST(input_file, long_read_sam_file_unsorted) default_vcf_sample_name, empty_path, // empty junctions path, empty_path, // empty clusters path, - verbose, default_threads, default_methods, simple_clustering, @@ -295,7 +292,6 @@ TEST(input_file, short_and_long_read_sam_file_with_different_references_lengths) default_vcf_sample_name, empty_path, // empty junctions path empty_path, // empty clusters path - verbose, default_threads, default_methods, simple_clustering,