From 2c0d9a1513f0200b243549a706963fd34205ead9 Mon Sep 17 00:00:00 2001 From: Tim Massingham Date: Sat, 17 Mar 2018 22:15:20 +0000 Subject: [PATCH] Scrappie seqmappy and implementation of local penalty in mapping routines --- CMakeLists.txt | 3 +- Makefile | 5 +- src/decode.c | 200 ++++++++++++++++++++------ src/decode.h | 16 +-- src/scrappie.c | 3 + src/scrappie_help.c | 4 + src/scrappie_mappy.c | 2 +- src/scrappie_seq_helpers.c | 28 ++-- src/scrappie_seq_helpers.h | 2 +- src/scrappie_seqmappy.c | 241 ++++++++++++++++++++++++++++++++ src/scrappie_squiggle.c | 2 +- src/scrappie_subcommands.c | 7 + src/scrappie_subcommands.h | 2 + src/test/test_map_to_sequence.c | 69 ++++++--- 14 files changed, 501 insertions(+), 83 deletions(-) create mode 100644 src/scrappie_seqmappy.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 2feb8dd..5d113eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ add_library (scrappie_objects OBJECT src/decode.c src/event_detection.c src/laye set_property(TARGET scrappie_objects PROPERTY POSITION_INDEPENDENT_CODE 1) add_library (scrappie_static STATIC $) set_target_properties(scrappie_static PROPERTIES OUTPUT_NAME scrappie CLEAN_DIRECT_OUTPUT 1) -add_executable (scrappie src/scrappie.c src/scrappie_raw.c src/scrappie_events.c src/scrappie_mappy.c src/scrappie_squiggle.c src/scrappie_subcommands.c src/scrappie_help.c src/fast5_interface.c) +add_executable (scrappie src/scrappie.c src/scrappie_raw.c src/scrappie_events.c src/scrappie_mappy.c src/scrappie_seqmappy.c src/scrappie_squiggle.c src/scrappie_subcommands.c src/scrappie_help.c src/fast5_interface.c) if (BUILD_SHARED_LIB) if (APPLE) @@ -145,6 +145,7 @@ add_test(test_rawrgrgr_r94_call scrappie raw --model rgrgr_r94 ${USE_THREADS} ${ add_test(test_rawrgrgr_r95_call scrappie raw --model rgrgr_r95 ${USE_THREADS} ${READSDIR}) add_test(test_rawrnnrf_r94_call scrappie raw --model rnnrf_r94 ${USE_THREADS} ${READSDIR}) add_test(test_mappy scrappie mappy ${READSDIR}/${TESTREAD}.fa ${READSDIR}/${TESTREAD}.fast5) +add_test(test_seqmappy scrappie seqmappy ${READSDIR}/${TESTREAD}.fa ${READSDIR}/${TESTREAD}.fast5) add_test(test_squiggle scrappie squiggle ${READSDIR}/test_squiggles.fa) add_test(test_licence scrappie licence) add_test(test_licence scrappie license) diff --git a/Makefile b/Makefile index 79f63c8..6542da0 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ buildDir ?= build releaseType ?= Release -.PHONY: all -all: ${buildDir}/scrappie +.PHONY: all scrappie +all: scrappie +scrappie: ${buildDir}/scrappie ${buildDir}: mkdir ${buildDir} diff --git a/src/decode.c b/src/decode.c index e92a8eb..58fc9e8 100644 --- a/src/decode.c +++ b/src/decode.c @@ -1231,18 +1231,22 @@ float squiggle_match_viterbi(const raw_table signal, const_scrappie_matrix param /** Viterbi score of sequence * - * Global mapping through sequence calculating scores of best path from basecall posterior + * Local-global mapping through sequence calculating scores of best path from basecall posterior + * + * Internal states are seq0 ... seq, start, end * * @param logpost Log posterior probability of state at each block. Stay is last state. * @param stay_pen Penalty for staying * @param skip_pen Penalty for skipping + * @param local_pen Penalty for local mapping (stay in start or ends state) * @param seq Sequence encoded into same history states as basecalls * @param seqlen Length of seq * @param path Viterbi path [out]. If NULL, no path is returned * * @returns score **/ -float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, float skip_pen, int const *seq, size_t seqlen, int *path){ +float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, float skip_pen, + float local_pen, int const *seq, size_t seqlen, int *path){ float logscore = NAN; RETURN_NULL_IF(NULL == logpost, logscore); RETURN_NULL_IF(NULL == seq, logscore); @@ -1251,10 +1255,14 @@ float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, flo const size_t nst = logpost->nr; const size_t STAY = nst - 1; + const size_t nseqstate = seqlen + 2; + const size_t START_STATE = seqlen; + const size_t END_STATE = seqlen + 1; + // Memory. - float * cscore = calloc(seqlen, sizeof(float)); - float * pscore = calloc(seqlen, sizeof(float)); - scrappie_imatrix traceback = make_scrappie_imatrix(seqlen, nblock); + float * cscore = calloc(nseqstate, sizeof(float)); + float * pscore = calloc(nseqstate, sizeof(float)); + scrappie_imatrix traceback = make_scrappie_imatrix(nseqstate, nblock); if(NULL == cscore || NULL == pscore){ free(pscore); free(cscore); @@ -1263,10 +1271,10 @@ float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, flo // Initialise - for(size_t pos=0 ; pos < seqlen ; pos++){ + for(size_t pos=0 ; pos < nseqstate ; pos++){ cscore[pos] = -BIG_FLOAT; } - cscore[0] = 0.0; + cscore[START_STATE] = 0.0; // Forwards Viterbi for(size_t blk=0 ; blk < nblock ; blk++){ @@ -1278,8 +1286,15 @@ float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, flo cscore = tmp; } + // Stay in start state (local penalty or stay) + cscore[START_STATE] = pscore[START_STATE] + fmaxf(-local_pen, logpost->data.f[lpoffset + STAY]); + traceback->data.f[toffset + START_STATE] = START_STATE; + // Stay in end state (local penalty or stay) + cscore[END_STATE] = pscore[END_STATE] + fmaxf(-local_pen, logpost->data.f[lpoffset + STAY]); + traceback->data.f[toffset + END_STATE] = END_STATE; + for(size_t pos=0 ; pos < seqlen ; pos++){ - // Stay + // Stay in ordinary state cscore[pos] = pscore[pos] - stay_pen + logpost->data.f[lpoffset + STAY]; traceback->data.f[toffset + pos] = pos; } @@ -1303,15 +1318,36 @@ float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, flo traceback->data.f[toffset + pos] = pos - 2; } } + + // Move directly from start to end without mapping + if(pscore[START_STATE] - local_pen > cscore[END_STATE]){ + cscore[END_STATE] = pscore[START_STATE] - local_pen; + traceback->data.f[toffset + END_STATE] = START_STATE; + } + // Move from start into sequence + if(pscore[START_STATE] + logpost->data.f[lpoffset + seq[0]] > cscore[0]){ + cscore[0] = pscore[START_STATE] + logpost->data.f[lpoffset + seq[0]]; + traceback->data.f[toffset + 0] = START_STATE; + } + // Move from sequence into end + if(pscore[seqlen - 1] - local_pen > cscore[END_STATE]){ + cscore[END_STATE] = pscore[seqlen - 1] - local_pen; + traceback->data.f[toffset + END_STATE] = seqlen - 1; + } } - logscore = cscore[seqlen - 1]; + logscore = fmaxf(cscore[seqlen - 1], cscore[END_STATE]); if(NULL != path){ - path[nblock - 1] = seqlen - 1; + path[nblock - 1] = (cscore[seqlen-1] > cscore[END_STATE]) ? (seqlen - 1) : END_STATE; for(size_t blk=nblock - 1; blk > 0 ; blk--){ const size_t toffset = blk * traceback->stride; path[blk - 1] = traceback->data.f[toffset + path[blk]]; } + for(size_t blk=0 ; blk < nblock ; blk++){ + if(START_STATE == path[blk] || END_STATE == path[blk]){ + path[blk] = -1; + } + } } traceback = free_scrappie_imatrix(traceback); @@ -1324,17 +1360,19 @@ float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, flo /** Forward score of sequence * - * Global mapping through sequence calculating sum of scores over all paths from basecall posterior + * Local-Global mapping through sequence calculating sum of scores over all paths from basecall posterior * * @param logpost Log posterior probability of state at each block. Stay is last state. * @param stay_pen Penalty for staying * @param skip_pen Penalty for skipping + * @param local_pen Penalty for local mapping (stay in start or ends state) * @param seq Sequence encoded into same history states as basecalls * @param seqlen Length of seq * * @returns score **/ -float map_to_sequence_forward(const_scrappie_matrix logpost, float stay_pen, float skip_pen, int const *seq, size_t seqlen){ +float map_to_sequence_forward(const_scrappie_matrix logpost, float stay_pen, float skip_pen, + float local_pen, int const *seq, size_t seqlen){ float logscore = NAN; RETURN_NULL_IF(NULL == logpost, logscore); RETURN_NULL_IF(NULL == seq, logscore); @@ -1343,9 +1381,13 @@ float map_to_sequence_forward(const_scrappie_matrix logpost, float stay_pen, flo const size_t nst = logpost->nr; const size_t STAY = nst - 1; - // Memeory. - float * cscore = calloc(seqlen, sizeof(float)); - float * pscore = calloc(seqlen, sizeof(float)); + const size_t nseqstate = seqlen + 2; + const size_t START_STATE = seqlen; + const size_t END_STATE = seqlen + 1; + + // Memory. + float * cscore = calloc(nseqstate, sizeof(float)); + float * pscore = calloc(nseqstate, sizeof(float)); if(NULL == cscore || NULL == pscore){ free(pscore); free(cscore); @@ -1354,10 +1396,10 @@ float map_to_sequence_forward(const_scrappie_matrix logpost, float stay_pen, flo // Initialise - for(size_t pos=0 ; pos <= seqlen ; pos++){ + for(size_t pos=0 ; pos < nseqstate ; pos++){ cscore[pos] = -BIG_FLOAT; } - cscore[0] = 0.0; + cscore[START_STATE] = 0.0; // Forwards pass for(size_t blk=0 ; blk < nblock ; blk++){ @@ -1368,24 +1410,40 @@ float map_to_sequence_forward(const_scrappie_matrix logpost, float stay_pen, flo cscore = tmp; } + // Stay in start state (local penalty or stay) + cscore[START_STATE] = pscore[START_STATE] + logsumexpf(-local_pen, logpost->data.f[lpoffset + STAY]); + // Stay in end state (local penalty or stay) + cscore[END_STATE] = pscore[END_STATE] + logsumexpf(-local_pen, logpost->data.f[lpoffset + STAY]); + for(size_t pos=0 ; pos < seqlen ; pos++){ // Stay cscore[pos] = pscore[pos] - stay_pen + logpost->data.f[lpoffset + STAY]; } - // Special case: step into first position - cscore[1] = logsumexpf(cscore[1], pscore[0] + logpost->data.f[lpoffset + seq[1]]); + for(size_t pos=1 ; pos < seqlen ; pos++){ + // Step + const size_t newstate = seq[pos]; + const float step_score = pscore[pos - 1] + logpost->data.f[lpoffset + newstate]; + cscore[pos] = logsumexpf(cscore[pos], step_score); + } + for(size_t pos=2 ; pos < seqlen ; pos++){ - // Step or skip + // skip const size_t newstate = seq[pos]; - const float max_step_or_skip_score = logsumexpf(pscore[pos - 1], pscore[pos - 2] - skip_pen) - + logpost->data.f[lpoffset + newstate]; - cscore[pos] = logsumexpf(max_step_or_skip_score, cscore[pos]); + const float skip_score = pscore[pos - 2] - skip_pen + logpost->data.f[lpoffset + newstate]; + cscore[pos] = logsumexpf(cscore[pos], skip_score); } + + // Move directly from start to end without mapping + cscore[END_STATE] = logsumexpf(cscore[END_STATE], pscore[START_STATE] - local_pen); + // Move from start into sequence + cscore[0] = logsumexpf(cscore[0], pscore[START_STATE] + logpost->data.f[lpoffset + seq[0]]); + // Move from sequence into end + cscore[END_STATE] = logsumexpf(cscore[END_STATE], pscore[seqlen - 1] - local_pen); } - logscore = cscore[seqlen - 1]; + logscore = logsumexpf(cscore[seqlen - 1], cscore[END_STATE]); free(pscore); @@ -1468,18 +1526,19 @@ bool are_bounds_sane(int const * low, int const * high, size_t nblock, size_t se /** Viterbi score of sequence, banded * - * Global mapping through sequence calculating scores of best path from basecall posterior (banded) + * Local-Global mapping through sequence calculating scores of best path from basecall posterior (banded) * * @param logpost Log posterior probability of state at each block. Stay is last state. * @param stay_pen Penalty for staying * @param skip_pen Penalty for skipping + * @param local_pen Penalty for local mapping (stay in start or ends state) * @param seq Sequence encoded into same history states as basecalls * @param seqlen Length of seq * @param poslow, poshigh Arrays of lowest and highest coordinate for each block. Low inclusive, high exclusive * * @returns score **/ -float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_pen, float skip_pen, +float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_pen, float skip_pen, float local_pen, int const *seq, size_t seqlen, int const * poslow, int const * poshigh){ float logscore = NAN; RETURN_NULL_IF(NULL == logpost, logscore); @@ -1491,13 +1550,17 @@ float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_p const size_t nst = logpost->nr; const size_t STAY = nst - 1; + const size_t nseqstate = seqlen + 2; + const size_t START_STATE = seqlen; + const size_t END_STATE = seqlen + 1; + // Verify assumptions about bounds RETURN_NULL_IF(!are_bounds_sane(poslow, poshigh, nblock, seqlen), logscore); // Memeory. First state is stay in previous position - float * cscore = calloc(seqlen, sizeof(float)); - float * pscore = calloc(seqlen, sizeof(float)); + float * cscore = calloc(nseqstate, sizeof(float)); + float * pscore = calloc(nseqstate, sizeof(float)); if(NULL == cscore || NULL == pscore){ free(pscore); free(cscore); @@ -1506,14 +1569,20 @@ float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_p // Initialise - for(size_t pos=0 ; pos <= seqlen ; pos++){ + for(size_t pos=0 ; pos < nseqstate ; pos++){ pscore[pos] = -BIG_FLOAT; cscore[pos] = -BIG_FLOAT; } + pscore[START_STATE] = 0.0; // Forwards Viterbi { // First block - cscore[0] = logpost->data.f[STAY] - stay_pen; + // Stay in start state (local penalty or stay) + cscore[START_STATE] = pscore[START_STATE] + fmaxf(-local_pen, logpost->data.f[STAY]); + // Stay in end state (local penalty or stay) + cscore[END_STATE] = pscore[END_STATE] + fmaxf(-local_pen, logpost->data.f[STAY]); + + cscore[0] = fmaxf(cscore[0], pscore[0] + logpost->data.f[STAY] - stay_pen); if(poshigh[0] > 0){ // Step const size_t stepto = seq[1]; @@ -1526,6 +1595,12 @@ float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_p const float skip_score = logpost->data.f[stepto] - skip_pen; cscore[2] = skip_score; } + // Move directly from start to end without mapping -- always allow + cscore[END_STATE] = fmaxf(cscore[END_STATE], pscore[START_STATE] - local_pen); + // Move from start into sequence -- lower bound for first block must be zero + cscore[0] = fmaxf(cscore[0], pscore[START_STATE] + logpost->data.f[seq[0]]); + // Move from sequence into end + cscore[END_STATE] = fmaxf(cscore[END_STATE], pscore[seqlen - 1] - local_pen); } for(size_t blk=1 ; blk < nblock ; blk++){ @@ -1536,6 +1611,12 @@ float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_p cscore = tmp; } + // Stay in start state (local penalty or stay) + cscore[START_STATE] = pscore[START_STATE] + fmaxf(-local_pen, logpost->data.f[lpoffset + STAY]); + // Stay in end state (local penalty or stay) + cscore[END_STATE] = pscore[END_STATE] + fmaxf(-local_pen, logpost->data.f[lpoffset + STAY]); + + for(size_t pos=poslow[blk] ; pos < poshigh[blk - 1] ; pos++){ // Stay cscore[pos] = pscore[pos] - stay_pen + logpost->data.f[lpoffset + STAY]; @@ -1559,10 +1640,19 @@ float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_p + logpost->data.f[lpoffset + skipto]; cscore[pos] = fmaxf(skip_score, cscore[pos]); } + + // Move directly from start to end without mapping -- always allow + cscore[END_STATE] = fmaxf(cscore[END_STATE], pscore[START_STATE] - local_pen); + // Move from start into sequence -- only allowed if lower bound is zero + if(0 == poslow[blk]){ + cscore[0] = fmaxf(cscore[0], pscore[START_STATE] + logpost->data.f[lpoffset + seq[0]]); + } + // Move from sequence into end + cscore[END_STATE] = fmaxf(cscore[END_STATE], pscore[seqlen - 1] - local_pen); } - logscore = cscore[seqlen - 1]; + logscore = fmax(cscore[seqlen - 1], cscore[END_STATE]); free(pscore); free(cscore); @@ -1573,11 +1663,12 @@ float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_p /** Forward score of sequence, banded * - * Global mapping through sequence calculating scores of best path from basecall posterior (banded) + * Local-Global mapping through sequence calculating scores of best path from basecall posterior (banded) * * @param logpost Log posterior probability of state at each block. Stay is last state. * @param stay_pen Penalty for staying * @param skip_pen Penalty for skipping + * @param local_pen Penalty for local mapping (stay in start or ends state) * @param seq Sequence encoded into same history states as basecalls * @param seqlen Length of seq * @param poslow, poshigh Arrays of lowest and highest coordinate for each block. Low inclusive, high exclusive @@ -1585,7 +1676,7 @@ float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_p * * @returns score **/ -float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_pen, float skip_pen, +float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_pen, float skip_pen, float local_pen, int const *seq, size_t seqlen, int const * poslow, int const * poshigh){ float logscore = NAN; RETURN_NULL_IF(NULL == logpost, logscore); @@ -1597,13 +1688,17 @@ float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_p const size_t nst = logpost->nr; const size_t STAY = nst - 1; + const size_t nseqstate = seqlen + 2; + const size_t START_STATE = seqlen; + const size_t END_STATE = seqlen + 1; + // Verify assumptions about bounds RETURN_NULL_IF(!are_bounds_sane(poslow, poshigh, nblock, seqlen), logscore); - // Memeory. First state is stay in previous position - float * cscore = calloc(seqlen, sizeof(float)); - float * pscore = calloc(seqlen, sizeof(float)); + // Memory. First state is stay in previous position + float * cscore = calloc(nseqstate, sizeof(float)); + float * pscore = calloc(nseqstate, sizeof(float)); if(NULL == cscore || NULL == pscore){ free(pscore); free(cscore); @@ -1612,14 +1707,20 @@ float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_p // Initialise - for(size_t pos=0 ; pos <= seqlen ; pos++){ + for(size_t pos=0 ; pos < nseqstate ; pos++){ pscore[pos] = -BIG_FLOAT; cscore[pos] = -BIG_FLOAT; } + pscore[START_STATE] = 0.0f; // Forwards pass { // First block - cscore[0] = logpost->data.f[STAY] - stay_pen; + // Stay in start state (local penalty or stay) + cscore[START_STATE] = pscore[START_STATE] + logsumexpf(-local_pen, logpost->data.f[STAY]); + // Stay in end state (local penalty or stay) + cscore[END_STATE] = pscore[END_STATE] + logsumexpf(-local_pen, logpost->data.f[STAY]); + + cscore[0] = logsumexpf(cscore[0], pscore[0] + logpost->data.f[STAY] - stay_pen); if(poshigh[0] > 0){ // Step const size_t stepto = seq[1]; @@ -1632,6 +1733,12 @@ float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_p const float skip_score = logpost->data.f[stepto] - skip_pen; cscore[2] = skip_score; } + // Move directly from start to end without mapping -- always allow + cscore[END_STATE] = logsumexpf(cscore[END_STATE], pscore[START_STATE] - local_pen); + // Move from start into sequence -- lower bound for first block must be zero + cscore[0] = logsumexpf(cscore[0], pscore[START_STATE] + logpost->data.f[seq[0]]); + // Move from sequence into end + cscore[END_STATE] = logsumexpf(cscore[END_STATE], pscore[seqlen - 1] - local_pen); } for(size_t blk=1 ; blk < nblock ; blk++){ @@ -1642,6 +1749,12 @@ float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_p cscore = tmp; } + // Stay in start state (local penalty or stay) + cscore[START_STATE] = pscore[START_STATE] + logsumexpf(-local_pen, logpost->data.f[lpoffset + STAY]); + // Stay in end state (local penalty or stay) + cscore[END_STATE] = pscore[END_STATE] + logsumexpf(-local_pen, logpost->data.f[lpoffset + STAY]); + + for(size_t pos=poslow[blk] ; pos < poshigh[blk - 1] ; pos++){ // Stay cscore[pos] = pscore[pos] - stay_pen + logpost->data.f[lpoffset + STAY]; @@ -1665,10 +1778,19 @@ float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_p + logpost->data.f[lpoffset + skipto]; cscore[pos] = logsumexpf(skip_score, cscore[pos]); } + + // Move directly from start to end without mapping -- always allow + cscore[END_STATE] = logsumexpf(cscore[END_STATE], pscore[START_STATE] - local_pen); + // Move from start into sequence -- only allowed if lower bound is zero + if(0 == poslow[blk]){ + cscore[0] = logsumexpf(cscore[0], pscore[START_STATE] + logpost->data.f[lpoffset + seq[0]]); + } + // Move from sequence into end + cscore[END_STATE] = logsumexpf(cscore[END_STATE], pscore[seqlen - 1] - local_pen); } - logscore = cscore[seqlen - 1]; + logscore = logsumexpf(cscore[seqlen - 1], cscore[END_STATE]); free(pscore); free(cscore); diff --git a/src/decode.h b/src/decode.h index 4e42dc0..1644fb0 100644 --- a/src/decode.h +++ b/src/decode.h @@ -35,15 +35,15 @@ float squiggle_match_viterbi(const raw_table signal, const_scrappie_matrix param bool are_bounds_sane(int const * low, int const * high, size_t nblock, size_t seqlen); -float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, - float skip_pen, int const *seq, size_t seqlen, int * path); -float map_to_sequence_forward(const_scrappie_matrix logpost, float stay_pen, - float skip_pen, int const *seq, size_t seqlen); -float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_pen, - float skip_pen, int const *seq, size_t seqlen, +float map_to_sequence_viterbi(const_scrappie_matrix logpost, float stay_pen, float skip_pen, + float local_pen, int const *seq, size_t seqlen, int * path); +float map_to_sequence_forward(const_scrappie_matrix logpost, float stay_pen, float skip_pen, + float local_pen, int const *seq, size_t seqlen); +float map_to_sequence_viterbi_banded(const_scrappie_matrix logpost, float stay_pen, float skip_pen, + float local_pen, int const *seq, size_t seqlen, int const * poslow, int const * poshigh); -float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_pen, - float skip_pen, int const *seq, size_t seqlen, +float map_to_sequence_forward_banded(const_scrappie_matrix logpost, float stay_pen, float skip_pen, + float local_pen, int const *seq, size_t seqlen, int const * poslow, int const * poshigh); diff --git a/src/scrappie.c b/src/scrappie.c index f19551a..c7c70bb 100644 --- a/src/scrappie.c +++ b/src/scrappie.c @@ -40,6 +40,9 @@ int main(int argc, char *argv[]) { case SCRAPPIE_MODE_MAPPY: ret = main_mappy(argc - 1, argv + 1); break; + case SCRAPPIE_MODE_SEQMAPPY: + ret = main_seqmappy(argc - 1, argv + 1); + break; default: ret = EXIT_FAILURE; warnx("Unrecognised subcommand %s\n", argv[1]); diff --git a/src/scrappie_help.c b/src/scrappie_help.c index 2d497e5..1353c06 100644 --- a/src/scrappie_help.c +++ b/src/scrappie_help.c @@ -69,6 +69,10 @@ int main_help(int argc, char *argv[]) { help_options[0] = argv[1]; ret = main_mappy(2, help_options); break; + case SCRAPPIE_MODE_SEQMAPPY: + help_options[0] = argv[1]; + ret = main_seqmappy(2, help_options); + break; default: ret = EXIT_FAILURE; warnx("Unrecognised subcommand %s\n", argv[1]); diff --git a/src/scrappie_mappy.c b/src/scrappie_mappy.c index f3304a7..34ecaa3 100644 --- a/src/scrappie_mappy.c +++ b/src/scrappie_mappy.c @@ -151,7 +151,7 @@ static struct argp argp = { options, parse_arg, args_doc, doc }; static scrappie_matrix sequence_to_squiggle(char const * base_seq, size_t n, bool rescale){ RETURN_NULL_IF(NULL == base_seq, NULL); - int * sequence = encode_bases_to_integers(base_seq, n); + int * sequence = encode_bases_to_integers(base_seq, n, 1); RETURN_NULL_IF(NULL == sequence, NULL); scrappie_matrix squiggle = dna_squiggle(sequence, n, rescale); diff --git a/src/scrappie_seq_helpers.c b/src/scrappie_seq_helpers.c index 141d22a..4e33a32 100644 --- a/src/scrappie_seq_helpers.c +++ b/src/scrappie_seq_helpers.c @@ -4,6 +4,8 @@ #include "scrappie_seq_helpers.h" +static int nbase = 4; + /** Converts a nucleotide base into integer * * Input may be uppercase or (optionally) lowercase. Ambiguous @@ -42,16 +44,24 @@ int base_to_int(char base, bool allow_lower){ * @returns Array [n] containing encoding of sequence or NULL if an * invalid base was encountered **/ -int * encode_bases_to_integers(char const * seq, size_t n){ - int * iseq = calloc(n, sizeof(int)); - for(size_t i=0 ; i < n ; i++){ - int ib = base_to_int(seq[i], true); - iseq[i] = ib; - if(-1 == ib){ - free(iseq); - iseq = NULL; - break; +int * encode_bases_to_integers(char const * seq, size_t n, size_t state_len){ + const size_t nstate = n - state_len + 1; + + int * iseq = calloc(nstate, sizeof(int)); + for(size_t i=0 ; i < nstate ; i++){ + int ib = 0; + for(size_t j=0 ; j < state_len ; j++){ + int newbase = base_to_int(seq[i + j], true); + if(-1 == newbase){ + free(iseq); + iseq = NULL; + break; + } + + ib *= nbase; + ib += newbase; } + iseq[i] = ib; } return iseq; diff --git a/src/scrappie_seq_helpers.h b/src/scrappie_seq_helpers.h index dbcd2c8..9340ad9 100644 --- a/src/scrappie_seq_helpers.h +++ b/src/scrappie_seq_helpers.h @@ -5,6 +5,6 @@ #include int base_to_int(char c, bool allow_lower); -int * encode_bases_to_integers(char const * seq, size_t n); +int * encode_bases_to_integers(char const * seq, size_t n, size_t state_len); #endif /* SCRAPPIE_SEQ_HELPERS */ diff --git a/src/scrappie_seqmappy.c b/src/scrappie_seqmappy.c new file mode 100644 index 0000000..f75def2 --- /dev/null +++ b/src/scrappie_seqmappy.c @@ -0,0 +1,241 @@ +#include +#include +//#include +#include +#include +#include + +#include "decode.h" +#include "fast5_interface.h" +#include "kseq.h" +#include "networks.h" +#include "scrappie_common.h" +#include "scrappie_licence.h" +#include "scrappie_seq_helpers.h" +#include "scrappie_stdlib.h" +#include "util.h" + +KSEQ_INIT(int, read) + +// Doesn't play nice with other headers, include last +#include + + +extern const char *argp_program_version; +extern const char *argp_program_bug_address; +static char doc[] = "Scrappie seqmappy (local-global)"; +static char args_doc[] = "fasta fast5"; +static struct argp_option options[] = { + {"localpen", 'l', "float", 0, "Penalty for local matching"}, + {"min_prob", 'm', "probability", 0, "Minimum bound on probability of match"}, + {"output", 'o', "filename", 0, "Write to file rather than stdout"}, + {"prefix", 'p', "string", 0, "Prefix to append to name of read"}, + {"segmentation", 3, "chunk:percentile", 0, "Chunk size and percentile for variance based segmentation"}, + {"skip", 's', "penalty", 0, "Penalty for skipping a base"}, + {"stay", 'y', "penalty", 0, "Penalty for staying"}, + {"trim", 't', "start:end", 0, "Number of samples to trim, as start:end"}, + {"licence", 10, 0, 0, "Print licensing information"}, + {"license", 11, 0, OPTION_ALIAS, "Print licensing information"}, + {0} +}; + +typedef struct { + size_t n; + char * seq; + char * name; +} scrappie_seq_t; + +struct arguments { + float local_pen; + float min_prob; + float skip_pen; + float stay_pen; + FILE * output; + char * prefix; + int trim_start; + int trim_end; + int varseg_chunk; + float varseg_thresh; + + char * fasta_file; + char * fast5_file; +}; + +static struct arguments args = { + .local_pen = 4.0f, + .min_prob = 1e-5f, + .stay_pen = 0.0f, + .skip_pen = 0.0f, + .output = NULL, + .prefix = "", + .trim_start = 200, + .trim_end = 10, + .varseg_chunk = 100, + .varseg_thresh = 0.0f, + + .fasta_file = NULL, + .fast5_file = NULL +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) { + switch (key) { + int ret = 0; + char * next_tok = NULL; + case 'l': + args.local_pen = atof(arg); + break; + case 'm': + args.min_prob = atof(arg); + break; + case 'o': + args.output = fopen(arg, "w"); + if(NULL == args.output){ + errx(EXIT_FAILURE, "Failed to open \"%s\" for output.", arg); + } + break; + case 'p': + args.prefix = arg; + break; + case 's': + args.skip_pen = atof(arg); + assert(isfinite(args.skip_pen)); + break; + case 't': + args.trim_start = atoi(strtok(arg, ":")); + next_tok = strtok(NULL, ":"); + if(NULL != next_tok){ + args.trim_end = atoi(next_tok); + } else { + args.trim_end = args.trim_start; + } + assert(args.trim_start >= 0); + assert(args.trim_end >= 0); + break; + case 'y': + args.stay_pen = atof(arg); + assert(isfinite(args.stay_pen)); + break; + case 3: + args.varseg_chunk = atoi(strtok(arg, ":")); + next_tok = strtok(NULL, ":"); + if(NULL == next_tok){ + errx(EXIT_FAILURE, "--segmentation should be of form chunk:percentile"); + } + args.varseg_thresh = atof(next_tok) / 100.0; + assert(args.varseg_chunk >= 0); + assert(args.varseg_thresh > 0.0 && args.varseg_thresh < 1.0); + break; + case 10: + case 11: + ret = fputs(scrappie_licence_text, stdout); + exit((EOF != ret) ? EXIT_SUCCESS : EXIT_FAILURE); + break; + + case ARGP_KEY_NO_ARGS: + argp_usage(state); + break; + + case ARGP_KEY_ARG: + args.fasta_file = state->argv[state->next - 1]; + if(NULL == state->argv[state->next]){ + errx(EXIT_FAILURE, "fast5 file is a required argument"); + } + args.fast5_file = state->argv[state->next]; + state->next = state->argc; + break; + + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static struct argp argp = { options, parse_arg, args_doc, doc }; + + +static scrappie_seq_t read_sequence_from_fasta(char const * filename){ + scrappie_seq_t seq = {0, NULL, NULL}; + + FILE * fh = fopen(filename, "r"); + if(NULL == fh){ + return seq; + } + + kseq_t * kseqer = kseq_init(fileno(fh)); + if(kseq_read(kseqer) >= 0){ + char * name = calloc(kseqer->name.l + 1, sizeof(char)); + char * base_seq = calloc(kseqer->seq.l, sizeof(char)); + if(NULL == base_seq || NULL == name){ + free(base_seq); + free(name); + } else { + seq.seq = strncpy(base_seq, kseqer->seq.s, kseqer->seq.l); + seq.name = strncpy(name, kseqer->name.s, kseqer->name.l); + seq.n = kseqer->seq.l; + } + } + + kseq_destroy(kseqer); + fclose(fh); + + return seq; +} + + +int main_seqmappy(int argc, char *argv[]) { + argp_parse(&argp, argc, argv, 0, 0, NULL); + if(NULL == args.output){ + args.output = stdout; + } + + + // Open sequence file + scrappie_seq_t seq = read_sequence_from_fasta(args.fasta_file); + if(NULL == seq.seq){ + warnx("Failed to open \"%s\" for input.\n", args.fasta_file); + return EXIT_FAILURE; + } + + const size_t state_len = 5; + int * states = encode_bases_to_integers(seq.seq, seq.n, state_len); + const size_t nstate = seq.n - state_len + 1; + + // Read raw signal and normalise + raw_table rt = read_raw(args.fast5_file, true); + rt = trim_and_segment_raw(rt, args.trim_start, args.trim_end, args.varseg_chunk, args.varseg_thresh); + if(NULL == rt.raw){ + warnx("Failed to open \"%s\" for input and trim signal.\n", args.fasta_file); + return EXIT_FAILURE; + } + medmad_normalise_array(rt.raw + rt.start, rt.end - rt.start); + + + scrappie_matrix logpost = nanonet_rgrgr_r94_posterior(rt, args.min_prob, true); + if(NULL == logpost){ + warnx("Failed to calculate posterior for \"%s\" ", args.fasta_file); + return EXIT_SUCCESS; + } + + const size_t nblock = logpost->nc; + int * path = calloc(nblock, sizeof(int)); + if(NULL != path){ + float score = map_to_sequence_viterbi(logpost, args.stay_pen, args.skip_pen, args.local_pen, states, nstate, path); + + fprintf(args.output, "# %s to %s -- score %f over %zu blocks (%f per block)\n", args.fast5_file, args.fasta_file, -score, nblock, -score / nblock); + fprintf(args.output, "block\tpos\n"); + for(size_t i=0 ; i < nblock ; i++){ + const int32_t pos = path[i]; + fprintf(args.output, "%zu\t%d\n", i, pos); + } + + free(path); + } + + + free(states); + logpost = free_scrappie_matrix(logpost); + free(seq.seq); + free(seq.name); + + return EXIT_SUCCESS; +} diff --git a/src/scrappie_squiggle.c b/src/scrappie_squiggle.c index 6852d2a..27173e8 100644 --- a/src/scrappie_squiggle.c +++ b/src/scrappie_squiggle.c @@ -99,7 +99,7 @@ static struct argp argp = { options, parse_arg, args_doc, doc }; static scrappie_matrix sequence_to_squiggle(char const * base_seq, size_t n, bool rescale){ RETURN_NULL_IF(NULL == base_seq, NULL); - int * sequence = encode_bases_to_integers(base_seq, n); + int * sequence = encode_bases_to_integers(base_seq, n, 1); RETURN_NULL_IF(NULL == sequence, NULL); scrappie_matrix squiggle = dna_squiggle(sequence, n, rescale); diff --git a/src/scrappie_subcommands.c b/src/scrappie_subcommands.c index 9279f37..9ba5d9f 100644 --- a/src/scrappie_subcommands.c +++ b/src/scrappie_subcommands.c @@ -25,6 +25,9 @@ enum scrappie_mode get_scrappie_mode(const char *modestr) { if (0 == strcmp(modestr, "mappy")){ return SCRAPPIE_MODE_MAPPY; } + if (0 == strcmp(modestr, "seqmappy")){ + return SCRAPPIE_MODE_SEQMAPPY; + } return SCRAPPIE_MODE_INVALID; } @@ -45,6 +48,8 @@ const char *scrappie_mode_string(const enum scrappie_mode mode) { return "squiggle"; case SCRAPPIE_MODE_MAPPY: return "mappy"; + case SCRAPPIE_MODE_SEQMAPPY: + return "seqmappy"; case SCRAPPIE_MODE_INVALID: errx(EXIT_FAILURE, "Invalid scrappie mode\n"); default: @@ -70,6 +75,8 @@ const char *scrappie_mode_description(const enum scrappie_mode mode) { return "Create approximate squiggle for sequence"; case SCRAPPIE_MODE_MAPPY: return "Map signal to approximate squiggle"; + case SCRAPPIE_MODE_SEQMAPPY: + return "Map signal to sequence via basecall posteriors"; case SCRAPPIE_MODE_INVALID: errx(EXIT_FAILURE, "Invalid scrappie mode\n"); default: diff --git a/src/scrappie_subcommands.h b/src/scrappie_subcommands.h index 344e33c..d1cea61 100644 --- a/src/scrappie_subcommands.h +++ b/src/scrappie_subcommands.h @@ -14,6 +14,7 @@ enum scrappie_mode {SCRAPPIE_MODE_EVENTS = 0, SCRAPPIE_MODE_VERSION, SCRAPPIE_MODE_SQUIGGLE, SCRAPPIE_MODE_MAPPY, + SCRAPPIE_MODE_SEQMAPPY, SCRAPPIE_MODE_INVALID }; enum scrappie_mode get_scrappie_mode(const char *modestr); @@ -28,6 +29,7 @@ int main_help_short(void); int main_licence(int argc, char *argv[]); int main_mappy(int argc, char * argv[]); int main_raw(int argc, char *argv[]); +int main_seqmappy(int argc, char * argv[]); int main_squiggle(int argc, char * argv[]); int main_version(int argc, char *argv[]); diff --git a/src/test/test_map_to_sequence.c b/src/test/test_map_to_sequence.c index 62b910f..99334bc 100644 --- a/src/test/test_map_to_sequence.c +++ b/src/test/test_map_to_sequence.c @@ -10,6 +10,7 @@ #include static const char posteriorfile[] = "posterior_trimmed.crp"; +static float BIG_VAL = 1.e30f; static const int32_t mapping_target[] = { 332, 306, 202, 811, 174, 699, 749, 949, 725, 854, @@ -136,7 +137,7 @@ void test_viterbi_map_to_sequence(void){ CU_ASSERT_PTR_NOT_NULL_FATAL(logpost); log_activation_inplace(logpost); - float score = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, NULL); + float score = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len, NULL); CU_ASSERT_NOT_EQUAL(score, NAN); logpost = free_scrappie_matrix(logpost); @@ -147,7 +148,7 @@ void test_forward_map_to_sequence(void){ CU_ASSERT_PTR_NOT_NULL_FATAL(logpost); log_activation_inplace(logpost); - float score = map_to_sequence_forward(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len); + float score = map_to_sequence_forward(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len); CU_ASSERT_NOT_EQUAL(score, NAN); logpost = free_scrappie_matrix(logpost); @@ -162,17 +163,17 @@ void test_viterbi_with_path_map_to_sequence(void){ int * path = calloc(nblock, sizeof(int)); CU_ASSERT_PTR_NOT_NULL_FATAL(path); - float score = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, path); + float score = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len, path); CU_ASSERT_NOT_EQUAL(score, NAN); - /*fputs("Viterbi mapping path",stdout); + fputs("Viterbi mapping path\n",stdout); fprintf(stdout,"%d",path[0]); for(size_t blk=1 ; blk < nblock ; blk++){ printf(", %d", path[blk]); if((blk + 1) % 20 == 0){ fputc('\n', stdout); } - }*/ + } free(path); logpost = free_scrappie_matrix(logpost); @@ -183,8 +184,9 @@ void test_forward_better_than_viterbi_map_to_sequence(void){ CU_ASSERT_PTR_NOT_NULL_FATAL(logpost); log_activation_inplace(logpost); - float score_vit = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, NULL); - float score_fwd = map_to_sequence_forward(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len); + float score_vit = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len, NULL); + float score_fwd = map_to_sequence_forward(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len); + printf("vit = %f fwd = %f\n", score_vit, score_fwd); CU_ASSERT_TRUE(score_fwd >= score_vit); logpost = free_scrappie_matrix(logpost); @@ -207,8 +209,9 @@ void test_full_band_viterbi_map_to_sequence(void){ poshigh[i] = mapping_target_len; } - float score_vit = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, NULL); - float score_vitB = map_to_sequence_viterbi_banded(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, poslow, poshigh); + float score_vit = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len, NULL); + float score_vitB = map_to_sequence_viterbi_banded(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len, poslow, poshigh); + printf("vit = %f vitB = %f\n", score_vit, score_vitB); CU_ASSERT_DOUBLE_EQUAL(score_vit, score_vitB, 1e-3); free(poshigh); @@ -233,8 +236,9 @@ void test_full_band_forward_map_to_sequence(void){ poshigh[i] = mapping_target_len; } - float score_fwd = map_to_sequence_forward(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len); - float score_fwdB = map_to_sequence_forward_banded(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, poslow, poshigh); + float score_fwd = map_to_sequence_forward(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len); + float score_fwdB = map_to_sequence_forward_banded(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len, poslow, poshigh); + printf("fwd = %f fwdB = %f\n", score_fwd, score_fwdB); CU_ASSERT_DOUBLE_EQUAL(score_fwd, score_fwdB, 1e-3); free(poshigh); @@ -243,7 +247,7 @@ void test_full_band_forward_map_to_sequence(void){ } -void test_relaxed_band_helper(int32_t band){ +void test_relaxed_band_helper(float local_pen, int32_t band){ scrappie_matrix logpost = read_scrappie_matrix(posteriorfile); CU_ASSERT_PTR_NOT_NULL_FATAL(logpost); @@ -261,8 +265,9 @@ void test_relaxed_band_helper(int32_t band){ poshigh[i] = imin(tightlow[i] + band, mapping_target_len); } - float score_vit = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, NULL); - float score_vitB = map_to_sequence_viterbi_banded(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, poslow, poshigh); + float score_vit = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, local_pen, mapping_target, mapping_target_len, NULL); + float score_vitB = map_to_sequence_viterbi_banded(logpost, 0.0f, 0.0f, local_pen, mapping_target, mapping_target_len, poslow, poshigh); + printf("vit = %f vitB = %f\n", score_vit, score_vitB); CU_ASSERT_DOUBLE_EQUAL(score_vit, score_vitB, 1e-3); free(poshigh); @@ -272,19 +277,36 @@ void test_relaxed_band_helper(int32_t band){ void test_relaxed_band2_viterbi_map_to_sequence(void){ - test_relaxed_band_helper(2); + test_relaxed_band_helper(BIG_VAL, 2); } void test_relaxed_band3_viterbi_map_to_sequence(void){ - test_relaxed_band_helper(3); + test_relaxed_band_helper(BIG_VAL, 3); } void test_relaxed_band4_viterbi_map_to_sequence(void){ - test_relaxed_band_helper(4); + test_relaxed_band_helper(BIG_VAL, 4); } void test_relaxed_band5_viterbi_map_to_sequence(void){ - test_relaxed_band_helper(5); + test_relaxed_band_helper(BIG_VAL, 5); +} + +static const float test_local_pen = 0.5f; +void test_relaxed_band2_with_local_viterbi_map_to_sequence(void){ + test_relaxed_band_helper(test_local_pen, 2); +} + +void test_relaxed_band3_with_local_viterbi_map_to_sequence(void){ + test_relaxed_band_helper(test_local_pen, 3); +} + +void test_relaxed_band4_with_local_viterbi_map_to_sequence(void){ + test_relaxed_band_helper(test_local_pen, 4); +} + +void test_relaxed_band5_with_local_viterbi_map_to_sequence(void){ + test_relaxed_band_helper(test_local_pen, 5); } void test_tight_band_forward_map_to_sequence(void){ @@ -302,10 +324,11 @@ void test_tight_band_forward_map_to_sequence(void){ tighthigh[i] = tightlow[i] - 1; } - float score_fwd = map_to_sequence_forward(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len); - float score_fwdB = map_to_sequence_forward_banded(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, + float score_fwd = map_to_sequence_forward(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len); + float score_fwdB = map_to_sequence_forward_banded(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len, tightlow, tighthigh); - float score_vit = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, mapping_target, mapping_target_len, NULL); + float score_vit = map_to_sequence_viterbi(logpost, 0.0f, 0.0f, BIG_VAL, mapping_target, mapping_target_len, NULL); + printf("vit = %f fwd = %f fwdB = %f\n", score_vit, score_fwd, score_fwdB); CU_ASSERT_DOUBLE_EQUAL(score_fwdB, score_vit, 1e-3); CU_ASSERT_TRUE(score_fwd > score_fwdB); @@ -330,6 +353,10 @@ static test_with_description tests[] = { {"Test mapping -- Viterbi with relaxed band3 equals Viterbi", test_relaxed_band3_viterbi_map_to_sequence}, {"Test mapping -- Viterbi with relaxed band4 equals Viterbi", test_relaxed_band4_viterbi_map_to_sequence}, {"Test mapping -- Viterbi with relaxed band5 equals Viterbi", test_relaxed_band5_viterbi_map_to_sequence}, + {"Test mapping -- Viterbi with relaxed band2 equals Viterbi, local pen", test_relaxed_band2_with_local_viterbi_map_to_sequence}, + {"Test mapping -- Viterbi with relaxed band3 equals Viterbi, local pen", test_relaxed_band3_with_local_viterbi_map_to_sequence}, + {"Test mapping -- Viterbi with relaxed band4 equals Viterbi, local pen", test_relaxed_band4_with_local_viterbi_map_to_sequence}, + {"Test mapping -- Viterbi with relaxed band5 equals Viterbi, local pen", test_relaxed_band5_with_local_viterbi_map_to_sequence}, {0}}; /** Register tests with CUnit