Skip to content

Commit

Permalink
[MISC] Apply 2. Review
Browse files Browse the repository at this point in the history
Signed-off-by: Lydia Buntrock <lydia.buntrock@fu-berlin.de>
  • Loading branch information
Irallia committed Sep 16, 2021
1 parent 4b0affd commit 620201b
Show file tree
Hide file tree
Showing 12 changed files with 40 additions and 55 deletions.
4 changes: 3 additions & 1 deletion include/iGenVar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "variant_detection/method_enums.hpp" // for enum detection_methods, clustering_methods and refinement_methods

inline bool gVerbose{false};

struct cmd_arguments
{
// Input:
Expand All @@ -16,7 +18,7 @@ struct cmd_arguments
/* -b */ std::filesystem::path clusters_file_path{};
// Others:
/* -h - help - not part of the args struct */
/* -v */ bool verbose = false;
/* -v - verbose - global variable gVerbose */
/* -t */ int16_t threads = 1;
// Methods:
/* -d */ std::vector<detection_methods> methods{cigar_string, split_read, read_pairs, read_depth}; // default: all
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,8 @@ int junction_distance(Junction const & lhs, Junction const & rhs);
*
* \param[in] junctions - a vector of junctions (needs to be sorted)
* \param[in] clustering_cutoff - distance cutoff for clustering
* \param[in] verbose - verbose option
*
* \details For the algorithms we use the library hclust.
* \see https://lionel.kr.hs-niederrhein.de/~dalitz/data/hclust/ (last access 01.06.2021).
*/
std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const & junctions,
double clustering_cutoff,
bool const verbose);
std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const & junctions, double clustering_cutoff);
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
* \param[in] query_sequence - SEQ field of the SAM/BAM file
* \param[in, out] junctions - vector for storing junctions
* \param[in] min_length - minimum length of variants to detect (default 30 bp, expected to be non-negative)
* \param[in] verbose - verbose option
*
* \details This function steps through the CIGAR string and stores junctions with their position in reference and read.
* We distinguish 4 cases of CIGAR operation characters:
Expand All @@ -37,5 +36,4 @@ void analyze_cigar(std::string const & read_name,
std::vector<seqan3::cigar> & cigar_string,
seqan3::dna5_vector const & query_sequence,
std::vector<Junction> & junctions,
int32_t const min_length,
bool const verbose);
int32_t const min_length);
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,13 @@ void retrieve_aligned_segments(std::string const & sa_string, std::vector<Aligne
* \param[in] read_name - QNAME field of the SAM/BAM file
* \param[in] min_length - minimum length of variants to detect (expected to be non-negative)
* \param[in] max_overlap - maximum overlap between alignment segments (expected to be non-negative)
* \param[in] verbose - verbose option
*/
void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segments,
std::vector<Junction> & junctions,
seqan3::dna5_vector const & query_sequence,
std::string const & read_name,
int32_t const min_length,
int32_t const max_overlap,
bool const verbose);
int32_t const max_overlap);

/*! \brief Parse the SA tag from the SAM/BAM alignment of a chimeric/split-aligned read. Build
* [aligned_segments](\ref AlignedSegment), one for each alignment segment of the read.
Expand Down
4 changes: 2 additions & 2 deletions src/iGenVar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ void initialize_argument_parser(seqan3::argument_parser & parser, cmd_arguments
parser.add_option(args.threads, 't', "threads",
"Specify the number of decompression threads used for reading BAM files.",
seqan3::option_spec::standard);
parser.add_flag(args.verbose, 'v', "verbose",
parser.add_flag(gVerbose, 'v', "verbose",
"If you set this flag to true, we provide additional details about what iGenVar does. The detailed "
"output is printed in the standard output.");

Expand Down Expand Up @@ -155,7 +155,7 @@ void detect_variants_in_alignment_file(cmd_arguments const & args)
clusters = simple_clustering_method(junctions);
break;
case 1: // hierarchical clustering
clusters = hierarchical_clustering_method(junctions, args.hierarchical_clustering_cutoff, args.verbose);
clusters = hierarchical_clustering_method(junctions, args.hierarchical_clustering_cutoff);
break;
case 2: // self-balancing_binary_tree,
seqan3::debug_stream << "The self-balancing binary tree clustering method is not yet implemented\n";
Expand Down
7 changes: 3 additions & 4 deletions src/modules/clustering/hierarchical_clustering_method.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "iGenVar.hpp" // for global variable gVerbose
#include "modules/clustering/hierarchical_clustering_method.hpp"

#include <limits> // for infinity
Expand Down Expand Up @@ -117,9 +118,7 @@ inline std::vector<Junction> subsample_partition(std::vector<Junction> const & p
return subsample;
}

std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const & junctions,
double clustering_cutoff,
bool const verbose)
std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const & junctions, double clustering_cutoff)
{
auto partitions = partition_junctions(junctions);
std::vector<Cluster> clusters{};
Expand All @@ -136,7 +135,7 @@ std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const
}
if (partition_size > max_partition_size)
{
if (verbose)
if (gVerbose)
{
seqan3::debug_stream << "A partition exceeds the maximum size ("
<< partition_size
Expand Down
8 changes: 4 additions & 4 deletions src/modules/sv_detection_methods/analyze_cigar_method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/sequence_file/output.hpp>

#include "iGenVar.hpp" // for global variable gVerbose
#include "structures/breakend.hpp" // for class Breakend
#include "structures/junction.hpp" // for class Junction

Expand All @@ -15,8 +16,7 @@ void analyze_cigar(std::string const & read_name,
std::vector<seqan3::cigar> & cigar_string,
seqan3::dna5_vector const & query_sequence,
std::vector<Junction> & junctions,
int32_t const min_length,
bool const verbose)
int32_t const min_length)
{
// Step through CIGAR string and store current position in reference and read
int32_t pos_ref = query_start_pos;
Expand Down Expand Up @@ -44,7 +44,7 @@ void analyze_cigar(std::string const & read_name,
inserted_bases,
tandem_dup_count,
read_name};
if (verbose)
if (gVerbose)
seqan3::debug_stream << "INS: " << new_junction << "\n";
junctions.push_back(std::move(new_junction));
}
Expand All @@ -60,7 +60,7 @@ void analyze_cigar(std::string const & read_name,
""_dna5,
tandem_dup_count,
read_name};
if (verbose)
if (gVerbose)
seqan3::debug_stream << "DEL: " << new_junction << "\n";
junctions.push_back(std::move(new_junction));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "iGenVar.hpp" // for global variable gVerbose
#include "modules/sv_detection_methods/analyze_split_read_method.hpp"

#include <seqan3/core/debug_stream.hpp>
Expand Down Expand Up @@ -57,8 +58,7 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
seqan3::dna5_vector const & query_sequence,
std::string const & read_name,
int32_t const min_length,
int32_t const max_overlap,
bool const verbose)
int32_t const max_overlap)
{
size_t tandem_dup_count = 0;
for (size_t i = 1; i < aligned_segments.size(); i++)
Expand Down Expand Up @@ -120,7 +120,7 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
next.get_query_start());
junctions.emplace_back(mate1, mate2, inserted_bases, tandem_dup_count, read_name);
}
if (verbose)
if (gVerbose)
seqan3::debug_stream << "BND: " << junctions.back() << "\n";
}
}
Expand Down Expand Up @@ -150,6 +150,5 @@ void analyze_sa_tag(std::string const & query_name,
seq,
query_name,
args.min_var_length,
args.max_overlap,
args.verbose);
args.max_overlap);
}
3 changes: 1 addition & 2 deletions src/variant_detection/variant_detection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,7 @@ void detect_junctions_in_long_reads_sam_file(std::vector<Junction> & junctions,
cigar,
seq,
junctions,
args.min_var_length,
args.verbose);
args.min_var_length);
break;
case detection_methods::split_read: // Detect junctions from split read evidence (SA tag,
if (!hasFlagSupplementary(flag)) // primary alignments only)
Expand Down
21 changes: 11 additions & 10 deletions test/api/clustering_test.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
#include <gtest/gtest.h>

#include "iGenVar.hpp" // for global variable gVerbose
#include "modules/clustering/simple_clustering_method.hpp" // for the simple clustering method
#include "modules/clustering/hierarchical_clustering_method.hpp" // for the hierarchical clustering method
#include "structures/cluster.hpp" // for class Cluster

using seqan3::operator""_dna5;

bool verbose = true;

/* -------- clustering methods tests -------- */

std::string const chrom1 = "chr1";
Expand Down Expand Up @@ -204,7 +203,7 @@ TEST(hierarchical_clustering, partitioning)
TEST(hierarchical_clustering, strict_clustering)
{
std::vector<Junction> input_junctions = prepare_input_junctions();
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 0, verbose);
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 0);

// Each junction in separate cluster
std::vector<Cluster> expected_clusters
Expand Down Expand Up @@ -259,7 +258,7 @@ TEST(hierarchical_clustering, strict_clustering)
TEST(hierarchical_clustering, clustering_10)
{
std::vector<Junction> input_junctions = prepare_input_junctions();
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 10, verbose);
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 10);

// Distance matrix for junctions from reads 1-3 and 6-8
// 1 2 3
Expand Down Expand Up @@ -322,7 +321,7 @@ TEST(hierarchical_clustering, clustering_10)
TEST(hierarchical_clustering, clustering_15)
{
std::vector<Junction> input_junctions = prepare_input_junctions();
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 15, verbose);
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 15);

// Distance matrix for junctions from reads 1-3 and 6-8
// 1 2 3
Expand Down Expand Up @@ -383,7 +382,7 @@ TEST(hierarchical_clustering, clustering_15)
TEST(hierarchical_clustering, clustering_25)
{
std::vector<Junction> input_junctions = prepare_input_junctions();
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 25, verbose);
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 25);

// Distance matrix for junctions from reads 1-3 and 6-8
// 1 2 3
Expand Down Expand Up @@ -450,6 +449,8 @@ TEST(hierarchical_clustering, clustering_25)

TEST(hierarchical_clustering, subsampling)
{
gVerbose = true;

std::vector<Junction> input_junctions;
for (int32_t i = 0; i < 300; ++i)
{
Expand All @@ -462,7 +463,7 @@ TEST(hierarchical_clustering, subsampling)
std::sort(input_junctions.begin(), input_junctions.end());

testing::internal::CaptureStderr();
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 0, verbose);
std::vector<Cluster> clusters = hierarchical_clustering_method(input_junctions, 0);

size_t num_junctions = 0;
for (Cluster const & cluster : clusters)
Expand Down Expand Up @@ -527,10 +528,10 @@ TEST(hierarchical_clustering, cluster_tandem_dup_count)
<< " unequal";
}

resulting_clusters = hierarchical_clustering_method(input_junctions, 0, verbose); // clustering_cutoff = 0
resulting_clusters = hierarchical_clustering_method(input_junctions, 0); // clustering_cutoff = 0
ASSERT_EQ(5, resulting_clusters.size());

resulting_clusters = hierarchical_clustering_method(input_junctions, 1, verbose); // clustering_cutoff = 1
resulting_clusters = hierarchical_clustering_method(input_junctions, 1); // clustering_cutoff = 1
ASSERT_EQ(expected_clusters.size(), resulting_clusters.size());

for (size_t cluster_index = 0; cluster_index < expected_clusters.size(); ++cluster_index)
Expand All @@ -540,6 +541,6 @@ TEST(hierarchical_clustering, cluster_tandem_dup_count)
<< " unequal";
}

resulting_clusters = hierarchical_clustering_method(input_junctions, 10, verbose); // clustering_cutoff = 10 (default value)
resulting_clusters = hierarchical_clustering_method(input_junctions, 10); // clustering_cutoff = 10 (default value)
ASSERT_EQ(1, resulting_clusters.size());
}
20 changes: 8 additions & 12 deletions test/api/detection_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ TEST(junction_detection, cigar_string_simple_del)
{
std::vector<Junction> junctions_res{};
int32_t const min_var_length = 10;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);

EXPECT_EQ(junctions_res.size(), 0);
}
Expand All @@ -39,7 +39,7 @@ TEST(junction_detection, cigar_string_simple_del)
{
std::vector<Junction> junctions_res{};
int32_t const min_var_length = 5;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);

Breakend new_breakend_1 {chromosome, 15, strand::forward};
Breakend new_breakend_2 {chromosome, 22, strand::forward};
Expand Down Expand Up @@ -73,7 +73,7 @@ TEST(junction_detection, cigar_string_del_padding)

std::vector<Junction> junctions_res{};
int32_t const min_var_length = 5;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);

Breakend new_breakend_1 {chromosome, 15, strand::forward};
Breakend new_breakend_2 {chromosome, 22, strand::forward};
Expand Down Expand Up @@ -105,7 +105,7 @@ TEST(junction_detection, cigar_string_simple_ins)

std::vector<Junction> junctions_res{};
int32_t const min_var_length = 5;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);

Breakend new_breakend_1 {chromosome, 9, strand::forward};
Breakend new_breakend_2 {chromosome, 10, strand::forward};
Expand Down Expand Up @@ -137,7 +137,7 @@ TEST(junction_detection, cigar_string_ins_hardclip)

std::vector<Junction> junctions_res{};
int32_t const min_var_length = 5;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);

Breakend new_breakend_1 {chromosome, 9, strand::forward};
Breakend new_breakend_2 {chromosome, 10, strand::forward};
Expand Down Expand Up @@ -272,8 +272,7 @@ TEST(junction_detection, analyze_aligned_segments)
query_sequence,
read_name,
10,
0,
verbose);
0);

Breakend new_breakend_1 {"chr1", 105, strand::forward};
Breakend new_breakend_2 {"chr2", 100, strand::forward};
Expand Down Expand Up @@ -331,8 +330,7 @@ TEST(junction_detection, analyze_aligned_segments)
query_sequence,
read_name,
20,
0,
verbose);
0);

Breakend new_breakend_1 {"chr1", 105, strand::forward};
Breakend new_breakend_2 {"chr2", 100, strand::forward};
Expand Down Expand Up @@ -383,8 +381,7 @@ TEST(junction_detection, overlapping_segments)
query_sequence,
read_name,
10,
10,
verbose));
10));

// Deletion from two overlapping alignment segments (overlap of 5bp)
Breakend new_breakend_1 {"chr1", 119, strand::forward};
Expand Down Expand Up @@ -431,7 +428,6 @@ TEST(junction_detection, analyze_sa_tag)
std::filesystem::path{}, // junctions_file_path
std::filesystem::path{}, // clusters_file_path
1, // threads
false, // verbose
std::vector<detection_methods>{cigar_string, split_read, read_pairs, read_depth},
simple_clustering,
sVirl_refinement_method,
Expand Down

0 comments on commit 620201b

Please sign in to comment.