Skip to content

Commit

Permalink
[FEATURE] Add verbose option
Browse files Browse the repository at this point in the history
Signed-off-by: Lydia Buntrock <lydia.buntrock@fu-berlin.de>
  • Loading branch information
Irallia committed Sep 16, 2021
1 parent 92cc315 commit 59a7804
Show file tree
Hide file tree
Showing 10 changed files with 62 additions and 35 deletions.
2 changes: 1 addition & 1 deletion include/iGenVar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct cmd_arguments
/* -b */ std::filesystem::path clusters_file_path{};
// Others:
/* -h - help - not part of the args struct */
/* -v - verbose - not implementet yet */
/* -v */ bool verbose = false;
/* -t */ int16_t threads = 1;
// Methods:
/* -d */ std::vector<detection_methods> methods{cigar_string, split_read, read_pairs, read_depth}; // default: all
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* \param[in] query_sequence - SEQ field of the SAM/BAM file
* \param[in, out] junctions - vector for storing junctions
* \param[in] min_length - minimum length of variants to detect (default 30 bp, expected to be non-negative)
* \param[in] verbose - verbose option
*
* \details This function steps through the CIGAR string and stores junctions with their position in reference and read.
* We distinguish 4 cases of CIGAR operation characters:
Expand All @@ -36,4 +37,5 @@ void analyze_cigar(std::string const & read_name,
std::vector<seqan3::cigar> & cigar_string,
seqan3::dna5_vector const & query_sequence,
std::vector<Junction> & junctions,
int32_t const min_length);
int32_t const min_length,
bool const verbose);
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,15 @@ void retrieve_aligned_segments(std::string const & sa_string, std::vector<Aligne
* \param[in] read_name - QNAME field of the SAM/BAM file
* \param[in] min_length - minimum length of variants to detect (expected to be non-negative)
* \param[in] max_overlap - maximum overlap between alignment segments (expected to be non-negative)
* \param[in] verbose - verbose option
*/
void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segments,
std::vector<Junction> & junctions,
seqan3::dna5_vector const & query_sequence,
std::string const & read_name,
int32_t const min_length,
int32_t const max_overlap);
int32_t const max_overlap,
bool const verbose);

/*! \brief Parse the SA tag from the SAM/BAM alignment of a chimeric/split-aligned read. Build
* [aligned_segments](\ref AlignedSegment), one for each alignment segment of the read.
Expand Down
3 changes: 3 additions & 0 deletions src/iGenVar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ void initialize_argument_parser(seqan3::argument_parser & parser, cmd_arguments
parser.add_option(args.threads, 't', "threads",
"Specify the number of decompression threads used for reading BAM files.",
seqan3::option_spec::standard);
parser.add_flag(args.verbose, 'v', "verbose",
"If you set this flag to true, we provide additional details about what iGenVar does. The detailed "
"output is printed in the standard output.");

// Options - Optional output:
parser.add_option(args.junctions_file_path, 'a', "junctions",
Expand Down
9 changes: 6 additions & 3 deletions src/modules/sv_detection_methods/analyze_cigar_method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ void analyze_cigar(std::string const & read_name,
std::vector<seqan3::cigar> & cigar_string,
seqan3::dna5_vector const & query_sequence,
std::vector<Junction> & junctions,
int32_t const min_length)
int32_t const min_length,
bool const verbose)
{
// Step through CIGAR string and store current position in reference and read
int32_t pos_ref = query_start_pos;
Expand Down Expand Up @@ -43,7 +44,8 @@ void analyze_cigar(std::string const & read_name,
inserted_bases,
tandem_dup_count,
read_name};
seqan3::debug_stream << "INS: " << new_junction << "\n";
if (verbose)
seqan3::debug_stream << "INS: " << new_junction << "\n";
junctions.push_back(std::move(new_junction));
}
pos_read += length;
Expand All @@ -58,7 +60,8 @@ void analyze_cigar(std::string const & read_name,
""_dna5,
tandem_dup_count,
read_name};
seqan3::debug_stream << "DEL: " << new_junction << "\n";
if (verbose)
seqan3::debug_stream << "DEL: " << new_junction << "\n";
junctions.push_back(std::move(new_junction));
}
pos_ref += length;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
seqan3::dna5_vector const & query_sequence,
std::string const & read_name,
int32_t const min_length,
int32_t const max_overlap)
int32_t const max_overlap,
bool const verbose)
{
size_t tandem_dup_count = 0;
for (size_t i = 1; i < aligned_segments.size(); i++)
Expand Down Expand Up @@ -119,7 +120,8 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
next.get_query_start());
junctions.emplace_back(mate1, mate2, inserted_bases, tandem_dup_count, read_name);
}
seqan3::debug_stream << "BND: " << junctions.back() << "\n";
if (verbose)
seqan3::debug_stream << "BND: " << junctions.back() << "\n";
}
}
}
Expand Down Expand Up @@ -148,5 +150,6 @@ void analyze_sa_tag(std::string const & query_name,
seq,
query_name,
args.min_var_length,
args.max_overlap);
args.max_overlap,
args.verbose);
}
3 changes: 2 additions & 1 deletion src/variant_detection/variant_detection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ void detect_junctions_in_long_reads_sam_file(std::vector<Junction> & junctions,
cigar,
seq,
junctions,
args.min_var_length);
args.min_var_length,
args.verbose);
break;
case detection_methods::split_read: // Detect junctions from split read evidence (SA tag,
if (!hasFlagSupplementary(flag)) // primary alignments only)
Expand Down
22 changes: 14 additions & 8 deletions test/api/detection_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
using seqan3::operator""_cigar_operation;
using seqan3::operator""_dna5;

bool verbose = false;

/* -------- detection methods tests -------- */

// TODO (irallia): implement test cases <- (23.7.21, irallia) which cases are done / still open?
Expand All @@ -28,7 +30,7 @@ TEST(junction_detection, cigar_string_simple_del)
{
std::vector<Junction> junctions_res{};
int32_t const min_var_length = 10;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);

EXPECT_EQ(junctions_res.size(), 0);
}
Expand All @@ -37,7 +39,7 @@ TEST(junction_detection, cigar_string_simple_del)
{
std::vector<Junction> junctions_res{};
int32_t const min_var_length = 5;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);

Breakend new_breakend_1 {chromosome, 15, strand::forward};
Breakend new_breakend_2 {chromosome, 22, strand::forward};
Expand Down Expand Up @@ -71,7 +73,7 @@ TEST(junction_detection, cigar_string_del_padding)

std::vector<Junction> junctions_res{};
int32_t const min_var_length = 5;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);

Breakend new_breakend_1 {chromosome, 15, strand::forward};
Breakend new_breakend_2 {chromosome, 22, strand::forward};
Expand Down Expand Up @@ -103,7 +105,7 @@ TEST(junction_detection, cigar_string_simple_ins)

std::vector<Junction> junctions_res{};
int32_t const min_var_length = 5;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);

Breakend new_breakend_1 {chromosome, 9, strand::forward};
Breakend new_breakend_2 {chromosome, 10, strand::forward};
Expand Down Expand Up @@ -135,7 +137,7 @@ TEST(junction_detection, cigar_string_ins_hardclip)

std::vector<Junction> junctions_res{};
int32_t const min_var_length = 5;
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length, verbose);

Breakend new_breakend_1 {chromosome, 9, strand::forward};
Breakend new_breakend_2 {chromosome, 10, strand::forward};
Expand Down Expand Up @@ -270,7 +272,8 @@ TEST(junction_detection, analyze_aligned_segments)
query_sequence,
read_name,
10,
0);
0,
verbose);

Breakend new_breakend_1 {"chr1", 105, strand::forward};
Breakend new_breakend_2 {"chr2", 100, strand::forward};
Expand Down Expand Up @@ -328,7 +331,8 @@ TEST(junction_detection, analyze_aligned_segments)
query_sequence,
read_name,
20,
0);
0,
verbose);

Breakend new_breakend_1 {"chr1", 105, strand::forward};
Breakend new_breakend_2 {"chr2", 100, strand::forward};
Expand Down Expand Up @@ -379,7 +383,8 @@ TEST(junction_detection, overlapping_segments)
query_sequence,
read_name,
10,
10));
10,
verbose));

// Deletion from two overlapping alignment segments (overlap of 5bp)
Breakend new_breakend_1 {"chr1", 119, strand::forward};
Expand Down Expand Up @@ -426,6 +431,7 @@ TEST(junction_detection, analyze_sa_tag)
std::filesystem::path{}, // junctions_file_path
std::filesystem::path{}, // clusters_file_path
1, // threads
false, // verbose
std::vector<detection_methods>{cigar_string, split_read, read_pairs, read_depth},
simple_clustering,
sVirl_refinement_method,
Expand Down
5 changes: 5 additions & 0 deletions test/api/input_file_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ std::string const default_alignment_short_reads_file_path = DATADIR"paired_end_m
std::string const default_alignment_long_reads_file_path = DATADIR"simulated.minimap2.hg19.coordsorted_cutoff.sam";
std::filesystem::path const empty_path{};
std::string default_vcf_sample_name{"MYSAMPLE"};
bool const verbose = false;
constexpr int16_t default_threads = 1;
std::vector<detection_methods> const default_methods{cigar_string, split_read, read_pairs, read_depth};
constexpr int32_t default_min_length = 30;
Expand Down Expand Up @@ -60,6 +61,7 @@ TEST(input_file, detect_junctions_in_short_read_sam_file)
default_vcf_sample_name,
empty_path, // empty junctions path,
empty_path, // empty clusters path,
verbose,
default_threads,
default_methods,
simple_clustering,
Expand Down Expand Up @@ -100,6 +102,7 @@ TEST(input_file, detect_junctions_in_long_reads_sam_file)
default_vcf_sample_name,
empty_path, // empty junctions path,
empty_path, // empty clusters path,
verbose,
default_threads,
default_methods,
simple_clustering,
Expand Down Expand Up @@ -214,6 +217,7 @@ TEST(input_file, long_read_sam_file_unsorted)
default_vcf_sample_name,
empty_path, // empty junctions path,
empty_path, // empty clusters path,
verbose,
default_threads,
default_methods,
simple_clustering,
Expand Down Expand Up @@ -291,6 +295,7 @@ TEST(input_file, short_and_long_read_sam_file_with_different_references_lengths)
default_vcf_sample_name,
empty_path, // empty junctions path
empty_path, // empty clusters path
verbose,
default_threads,
default_methods,
simple_clustering,
Expand Down
36 changes: 19 additions & 17 deletions test/cli/iGenVar_cli_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ std::string const help_page_part_1
" -t, --threads (signed 16 bit integer)\n"
" Specify the number of decompression threads used for reading BAM\n"
" files. Default: 1.\n"
" -v, --verbose\n"
" If you set this flag to true, we provide additional details about\n"
" what iGenVar does. The detailed output is printed in the standard\n"
" output.\n"
};

std::string const help_page_part_2
Expand Down Expand Up @@ -162,10 +166,6 @@ std::string expected_res_empty
std::string expected_err_default_no_err
{
"Detect junctions in long reads...\n"
"INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\t0\tm2257/8161/CCS\n"
"BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\t0\tm41327/11677/CCS\n"
"BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm21263/13017/CCS\n"
"BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm38637/7161/CCS\n"
"Start clustering...\n"
"Done with clustering. Found 2 junction clusters.\n"
"No refinement was selected.\n"
Expand All @@ -188,14 +188,23 @@ TEST_F(iGenVar_cli_test, no_options)
// TODO (irallia): There is an open Issue, if we want to add the verbose option https://github.com/seqan/iGenVar/issues/20
TEST_F(iGenVar_cli_test, test_verbose_option)
{
cli_test_result result = execute_app("iGenVar", "-v");
cli_test_result result = execute_app("iGenVar", "-j", data(default_alignment_long_reads_file_path), "--verbose");
std::string expected_err
{
"[Error] Unknown option -v. In case this is meant to be a non-option/argument/parameter, please specify "
"the start of non-options with '--'. See -h/--help for program information.\n"
"Detect junctions in long reads...\n"
"INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\t0\tm2257/8161/CCS\n"
"The read depth method for long reads is not yet implemented.\n"
"BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\t0\tm41327/11677/CCS\n"
"The read depth method for long reads is not yet implemented.\n"
"BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm21263/13017/CCS\n"
"The read depth method for long reads is not yet implemented.\n"
"BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm38637/7161/CCS\n"
"The read depth method for long reads is not yet implemented.\n"
"Start clustering...\n"
"Done with clustering. Found 2 junction clusters.\nNo refinement was selected.\n"
};
EXPECT_EQ(result.exit_code, 65280);
EXPECT_EQ(result.out, std::string{});
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out, expected_res_default);
EXPECT_EQ(result.err, expected_err);
}

Expand Down Expand Up @@ -351,13 +360,9 @@ TEST_F(iGenVar_cli_test, with_default_arguments)
std::string expected_err
{
"Detect junctions in long reads...\n"
"INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\t0\tm2257/8161/CCS\n"
"The read depth method for long reads is not yet implemented.\n"
"BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\t0\tm41327/11677/CCS\n"
"The read depth method for long reads is not yet implemented.\n"
"BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm21263/13017/CCS\n"
"The read depth method for long reads is not yet implemented.\n"
"BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm38637/7161/CCS\n"
"The read depth method for long reads is not yet implemented.\n"
"Start clustering...\n"
"Done with clustering. Found 2 junction clusters.\n"
Expand Down Expand Up @@ -442,10 +447,6 @@ TEST_F(iGenVar_cli_test, test_direct_methods_input)
std::string expected_err
{
"Detect junctions in long reads...\n"
"INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\t0\tm2257/8161/CCS\n"
"BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\t0\tm41327/11677/CCS\n"
"BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm21263/13017/CCS\n"
"BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm38637/7161/CCS\n"
"Start clustering...\n"
"Done with clustering. Found 3 junction clusters.\n"
"No refinement was selected.\n"
Expand Down Expand Up @@ -534,6 +535,7 @@ TEST_F(iGenVar_cli_test, dataset_single_end_mini_example)
{
cli_test_result result = execute_app("iGenVar",
"-j", data("single_end_mini_example.sam"),
"--verbose",
"--method cigar_string --method split_read "
"--min_var_length 8 --max_var_length 400");

Expand Down

0 comments on commit 59a7804

Please sign in to comment.