From 8a4697f87895a6777e8a536d380ad021de65cf24 Mon Sep 17 00:00:00 2001 From: Thomas Mailund Date: Wed, 20 Feb 2019 09:18:41 +0100 Subject: [PATCH] Changed the BWT interface I simplified the interface to the BWT code by putting references to the suffix array and the remap table in the bwt_table structure. --- stralg/bwt.c | 46 +++++++++++++++------------------------ stralg/bwt.h | 2 -- tests/approx_match_test.c | 2 +- tests/match_test.c | 8 ++----- 4 files changed, 20 insertions(+), 38 deletions(-) diff --git a/stralg/bwt.c b/stralg/bwt.c index dbf87b3..49776e6 100644 --- a/stralg/bwt.c +++ b/stralg/bwt.c @@ -108,7 +108,7 @@ void dealloc_bwt_exact_match_iter(struct bwt_exact_match_iter *iter) struct bwt_approx_internal_match { const char *cigar; size_t match_length; - struct suffix_array *sa; + const struct suffix_array *sa; size_t L; size_t R; }; @@ -125,12 +125,8 @@ struct bwt_approx_frame { size_t R; }; struct bwt_approx_match_internal_iter { - struct suffix_array *sa; - struct bwt_table *bwt_table; - struct remap_table *remap_table; - + struct bwt_table *bwt_table; struct bwt_approx_frame sentinel; - const char *remapped_pattern; char *full_cigar_buf; char *cigar_buf; @@ -138,11 +134,9 @@ struct bwt_approx_match_internal_iter { void init_bwt_approx_match_internal_iter (struct bwt_approx_match_internal_iter *iter, - struct bwt_table *bwt_table, - struct suffix_array *sa, - struct remap_table *remap_table, - const char *remapped_pattern, - int edits); + struct bwt_table *bwt_table, + const char *remapped_pattern, + int edits); bool next_bwt_approx_match_internal_iter (struct bwt_approx_match_internal_iter *iter, struct bwt_approx_internal_match *match); @@ -210,6 +204,7 @@ static void push_edits(struct bwt_approx_match_internal_iter *iter, struct bwt_table *bwt_table = iter->bwt_table; uint32_t *c_table = bwt_table->c_table; uint32_t *o_table = bwt_table->o_table; + const struct remap_table *remap_table = iter->bwt_table->remap_table; size_t new_L; size_t new_R; @@ -217,7 +212,7 @@ static void push_edits(struct bwt_approx_match_internal_iter *iter, // M-operations unsigned char match_a = iter->remapped_pattern[i]; // Iterating alphabet from 1 so I don't include the sentinel. - for (unsigned char a = 1; a < iter->remap_table->alphabet_size; ++a) { + for (unsigned char a = 1; a < remap_table->alphabet_size; ++a) { size_t o_contrib = (L == 0) ? 0 : o_table[o_index(a, L - 1, bwt_table)]; new_L = c_table[a] + o_contrib + 1; new_R = c_table[a] + o_table[o_index(a, R, bwt_table)]; @@ -242,7 +237,7 @@ static void push_edits(struct bwt_approx_match_internal_iter *iter, // D-operation if (!first) { // never start with a deletion // Iterating alphabet from 1 so I don't include the sentinel. - for (unsigned char a = 1; a < iter->remap_table->alphabet_size; ++a) { + for (unsigned char a = 1; a < remap_table->alphabet_size; ++a) { size_t o_contrib = (L == 0) ? 0 : o_table[o_index(a, L - 1, bwt_table)]; new_L = c_table[a] + o_contrib + 1; new_R = c_table[a] + o_table[o_index(a, R, bwt_table)]; @@ -283,16 +278,11 @@ static void pop_edits(struct bwt_approx_match_internal_iter *iter, } -void init_bwt_approx_match_internal_iter (struct bwt_approx_match_internal_iter *iter, - struct bwt_table *bwt_table, - struct suffix_array *sa, - struct remap_table *remap_table, - const char *p, - int edits) +void init_bwt_approx_match_internal_iter + (struct bwt_approx_match_internal_iter *iter, + struct bwt_table *bwt_table, const char *p, int edits) { - iter->sa = sa; iter->bwt_table = bwt_table; - iter->remap_table = remap_table; iter->remapped_pattern = p; // one edit can max cost four characters @@ -308,7 +298,7 @@ void init_bwt_approx_match_internal_iter (struct bwt_approx_match_internal_ite printf("\n"); #endif - size_t n = iter->sa->length; + size_t n = iter->bwt_table->sa->length; size_t m = strlen(p); size_t L = 0; @@ -320,8 +310,9 @@ void init_bwt_approx_match_internal_iter (struct bwt_approx_match_internal_ite iter->full_cigar_buf, 0, edits, L, R, i); } -bool next_bwt_approx_match_internal_iter (struct bwt_approx_match_internal_iter *iter, - struct bwt_approx_internal_match *res) +bool next_bwt_approx_match_internal_iter + (struct bwt_approx_match_internal_iter *iter, + struct bwt_approx_internal_match *res) { char edit_op; int edits; @@ -355,7 +346,7 @@ bool next_bwt_approx_match_internal_iter (struct bwt_approx_match_internal_ite res->cigar = iter->cigar_buf; res->match_length = match_length; - res->sa = iter->sa; + res->sa = iter->bwt_table->sa; res->L = L; res->R = R; @@ -389,14 +380,11 @@ void init_bwt_exact_match_from_approx_match(const struct bwt_approx_internal_mat // it for all exact matches. void init_bwt_approx_iter(struct bwt_approx_iter *iter, struct bwt_table *bwt_table, - struct suffix_array *sa, - struct remap_table *remap_table, const char *remapped_pattern, int edits) { iter->internal_approx_iter = malloc(sizeof(struct bwt_approx_match_internal_iter)); - init_bwt_approx_match_internal_iter(iter->internal_approx_iter, bwt_table, sa, - remap_table, remapped_pattern, edits); + init_bwt_approx_match_internal_iter(iter->internal_approx_iter, bwt_table, remapped_pattern, edits); iter->internal_exact_iter = malloc(sizeof(struct bwt_exact_match_iter)); init_bwt_exact_match_iter(iter->internal_exact_iter, bwt_table, remapped_pattern); iter->outer = true; diff --git a/stralg/bwt.h b/stralg/bwt.h index 739e701..becc37e 100644 --- a/stralg/bwt.h +++ b/stralg/bwt.h @@ -56,8 +56,6 @@ struct bwt_approx_match { }; void init_bwt_approx_iter(struct bwt_approx_iter *iter, struct bwt_table *bwt_table, - struct suffix_array *sa, - struct remap_table *remap_table, const char *remapped_pattern, int edits); bool next_bwt_approx_match(struct bwt_approx_iter *iter, diff --git a/tests/approx_match_test.c b/tests/approx_match_test.c index d09c278..5d24af0 100644 --- a/tests/approx_match_test.c +++ b/tests/approx_match_test.c @@ -228,7 +228,7 @@ static void bwt_match(struct suffix_array *sa, struct bwt_approx_iter iter; struct bwt_approx_match match; - init_bwt_approx_iter(&iter, bwt_table, sa, remap_table, pattern, edits); + init_bwt_approx_iter(&iter, bwt_table, pattern, edits); while (next_bwt_approx_match(&iter, &match)) { rev_remap_between0(rev_mapped_match, string + match.position, diff --git a/tests/match_test.c b/tests/match_test.c index d326f0a..45355d0 100644 --- a/tests/match_test.c +++ b/tests/match_test.c @@ -157,9 +157,6 @@ static void general_match_test(const char *pattern, } static void bwt_match(index_vector *naive, - // the original pattern and string parameters - // are here for debugging. - const char *pattern, const char *string, struct remap_table *remap_table, char *remapped_pattern, char *remapped_string) { @@ -174,7 +171,7 @@ static void bwt_match(index_vector *naive, struct bwt_exact_match_iter bwt_iter; struct bwt_exact_match bwt_match; - init_bwt_exact_match_iter(&bwt_iter, &bwt_table, sa, remapped_pattern); + init_bwt_exact_match_iter(&bwt_iter, &bwt_table, remapped_pattern); while (next_bwt_exact_match_iter(&bwt_iter, &bwt_match)) { index_vector_append(&bwt, bwt_match.pos); } @@ -226,8 +223,7 @@ static void remap_match_test(const char *pattern, simple_exact_matchers(&naive, remapped_pattern, remapped_string); general_suffix_test(&naive, remapped_pattern, remapped_string); - bwt_match(&naive, pattern, string, &remap_table, - remapped_pattern, remapped_string); + bwt_match(&naive, &remap_table, remapped_pattern, remapped_string); dealloc_remap_table(&remap_table); dealloc_index_vector(&naive);