Skip to content

Commit

Permalink
r336: fine tuning pemerge
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Mar 7, 2013
1 parent 557d50c commit 72817b6
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
6 changes: 3 additions & 3 deletions bwase.c
Expand Up @@ -163,7 +163,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
bwa_cigar_t *cigar = 0; bwa_cigar_t *cigar = 0;
uint32_t *cigar32 = 0; uint32_t *cigar32 = 0;
ubyte_t *rseq; ubyte_t *rseq;
int tle, qle, gtle, gscore, lscore; int tle, qle, gtle, gscore;
int64_t k, rb, re, rlen; int64_t k, rb, re, rlen;
int8_t mat[25]; int8_t mat[25];


Expand All @@ -176,7 +176,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen); rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
seq_reverse(len, seq, 0); // as we need to do left extension, we have to reverse both query and reference sequences seq_reverse(len, seq, 0); // as we need to do left extension, we have to reverse both query and reference sequences
seq_reverse(rlen, rseq, 0); seq_reverse(rlen, rseq, 0);
lscore = ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0); ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
if (gscore > 0) tle = gtle, qle = len; if (gscore > 0) tle = gtle, qle = len;
rb = re - tle; rlen = tle; rb = re - tle; rlen = tle;
seq_reverse(len, seq, 0); seq_reverse(len, seq, 0);
Expand All @@ -192,7 +192,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
rb = *_pos; re = rb + len + SW_BW; rb = *_pos; re = rb + len + SW_BW;
if (re > l_pac) re = l_pac; if (re > l_pac) re = l_pac;
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen); rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
lscore = ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0); ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
if (gscore > 0) tle = gtle, qle = len; if (gscore > 0) tle = gtle, qle = len;
re = rb + tle; rlen = tle; re = rb + tle; rlen = tle;
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
Expand Down
2 changes: 1 addition & 1 deletion main.c
Expand Up @@ -3,7 +3,7 @@
#include "utils.h" #include "utils.h"


#ifndef PACKAGE_VERSION #ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.0-r335-beta" #define PACKAGE_VERSION "0.7.0-r336-beta"
#endif #endif


int bwa_fa2pac(int argc, char *argv[]); int bwa_fa2pac(int argc, char *argv[]);
Expand Down
14 changes: 7 additions & 7 deletions pemerge.c
Expand Up @@ -18,7 +18,7 @@ static const char *err_msg[MAX_ERR+1] = {
"pairs where the best SW alignment is not an overlap (long right end)", "pairs where the best SW alignment is not an overlap (long right end)",
"pairs with large 2nd best SW score", "pairs with large 2nd best SW score",
"pairs with gapped overlap", "pairs with gapped overlap",
"pairs where the end-to-end ungapped alignment score is not high enough", "pairs where the end-to-end alignment is inconsistent with SW",
"pairs potentially with tandem overlaps", "pairs potentially with tandem overlaps",
"pairs with high sum of errors" "pairs with high sum of errors"
}; };
Expand Down Expand Up @@ -81,18 +81,18 @@ int bwa_pemerge(const pem_opt_t *opt, mseq_t x[2], uint8_t **seq_, uint8_t **qua
int max_m, max_m2, min_l, max_l, max_l2; int max_m, max_m2, min_l, max_l, max_l2;
max_m = max_m2 = 0; max_l = max_l2 = 0; max_m = max_m2 = 0; max_l = max_l2 = 0;
min_l = x[0].s.l < x[1].s.l? x[0].s.l : x[1].s.l; min_l = x[0].s.l < x[1].s.l? x[0].s.l : x[1].s.l;
for (l = 0; l < min_l; ++l) { for (l = 1; l < min_l; ++l) {
int m = 0, o = x[0].s.l - l; int m = 0, o = x[0].s.l - l;
int a = opt->a, b = -opt->b; uint8_t *s0o = &s[0][o], *s1 = s[1];
for (i = 0; i < l; ++i) for (i = 0; i < l; ++i) // TODO: in principle, this can be done with SSE2. It is the bottleneck!
m += (s[0][o + i] == s[1][i])? a : b; m += opt->mat[(s1[i]<<2) + s1[i] + s0o[i]]; // equivalent to s[1][i]*5 + s[0][o+i]
if (m > max_m) max_m2 = max_m, max_m = m, max_l2 = max_l, max_l = l; if (m > max_m) max_m2 = max_m, max_m = m, max_l2 = max_l, max_l = l;
else if (m > max_m2) max_m2 = m, max_l2 = l; else if (m > max_m2) max_m2 = m, max_l2 = l;
} }
if (max_m < opt->T) return -6; if (max_m < opt->T || max_l != x[0].s.l - (r.tb - r.qb)) return -6;
if (max_l2 < max_l && max_m2 >= opt->T && (double)(max_m2 + (max_l - max_l2) * opt->a) / max_m >= MAX_SCORE_RATIO) if (max_l2 < max_l && max_m2 >= opt->T && (double)(max_m2 + (max_l - max_l2) * opt->a) / max_m >= MAX_SCORE_RATIO)
return -7; return -7;
//printf("*** %d,%d; %d,%d\n", max_m, max_m2, max_l, max_l2); if (max_l2 > max_l && (double)max_m2 / max_m >= MAX_SCORE_RATIO) return -7;
} }


l = x[0].s.l - (r.tb - r.qb); // length to merge l = x[0].s.l - (r.tb - r.qb); // length to merge
Expand Down

0 comments on commit 72817b6

Please sign in to comment.