Permalink
Browse files

generate multiple alignments from one chain

  • Loading branch information...
lh3 committed Feb 21, 2013
1 parent cfbc4c8 commit a578688fa80212b800f6f82842269095dba22f4e
Showing with 27 additions and 33 deletions.
  1. +20 −26 bwamem.c
  2. +1 −1 bwamem.h
  3. +6 −6 kvec.h
View
@@ -415,16 +415,14 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)
return l > 1? l : 1;
}
-void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_t *a)
+void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
{ // FIXME: in general, we SHOULD check funny seed patterns such as contained seeds. When that happens, we should use a SW or extend more seeds
- int i, k, csub = 0;
+ int i, k;
int64_t rlen, rmax[2], tmp, max = 0, max_i = 0;
const mem_seed_t *s;
uint8_t *rseq = 0;
- mem_alnreg_t best;
- memset(&best, 0, sizeof(mem_alnreg_t));
- memset(a, 0, sizeof(mem_alnreg_t));
+ av->n = 0;
// get the max possible span
rmax[0] = l_pac<<1; rmax[1] = 0;
for (i = 0; i < c->n; ++i) {
@@ -441,6 +439,8 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
if (rlen != rmax[1] - rmax[0]) return;
for (k = 0; k < c->n;) {
+ mem_alnreg_t *a;
+ a = kv_pushp(mem_alnreg_t, *av);
s = &c->seeds[k];
memset(a, 0, sizeof(mem_alnreg_t));
if (s->qbeg) { // left extension
@@ -463,28 +463,22 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, opt->w, a->score, &qle, &tle);
a->qe = qe + qle; a->re = rmax[0] + re + tle;
} else a->qe = l_query, a->re = s->rbeg + s->len;
- if (a->score >= best.score) csub = best.score, best = *a;
if (mem_verbose >= 4) printf("[%d] score=%d\t[%d,%d) <=> [%ld,%ld)\n", k, a->score, a->qb, a->qe, (long)a->rb, (long)a->re);
- // jump to the next seed that: 1) has no overlap with the previous seed; 2) is not fully contained in the alignment
+ // compute seedcov
+ for (i = 0, a->seedcov = 0; i < c->n; ++i) {
+ const mem_seed_t *t = &c->seeds[i];
+ if (t->qbeg >= a->qb && t->qbeg + t->len <= a->qe && t->rbeg >= a->rb && t->rbeg + t->len <= a->re) // seed fully contained
+ a->seedcov += t->len; // this is not very accurate, but for approx. mapQ, this is good enough
+ }
+ // jump to the next seed that: 1) has no overlap with the previous seed, or 2) is not fully contained in the alignment
for (i = k + 1; i < c->n; ++i) {
const mem_seed_t *t = &c->seeds[i];
if ((t-1)->rbeg + (t-1)->len >= t->rbeg || (t-1)->qbeg + (t-1)->len >= t->qbeg) break;
if (t->rbeg + t->len > a->re || t->qbeg + t->len > a->qe) break;
}
k = i;
}
- if (a->score < best.score) *a = best;
- a->csub = csub;
free(rseq);
-
- // compute seedcov
- if (c->n > 1) {
- for (i = 0, a->seedcov = 0; i < c->n; ++i) {
- s = &c->seeds[i];
- if (s->qbeg >= a->qb && s->qbeg + s->len <= a->qe && s->rbeg >= a->rb && s->rbeg + s->len <= a->re) // seed fully contained
- a->seedcov += s->len; // this is not very accurate, but for approx. mapQ, this is good enough
- }
- } else a->seedcov = c->seeds[0].len;
}
/*****************************
@@ -650,21 +644,23 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
static mem_alnreg_v find_alnreg(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s)
{
- int i;
+ int i, j;
mem_chain_v chn;
- mem_alnreg_v regs;
+ mem_alnreg_v regs, tmp;
for (i = 0; i < s->l_seq; ++i)
s->seq[i] = nst_nt4_table[(int)s->seq[i]];
chn = mem_chain(opt, bwt, s->l_seq, (uint8_t*)s->seq);
chn.n = mem_chain_flt(opt, chn.n, chn.a);
if (mem_verbose >= 4) mem_print_chain(bns, &chn);
- regs.n = regs.m = chn.n;
- regs.a = malloc(regs.n * sizeof(mem_alnreg_t));
+ kv_init(regs); kv_init(tmp);
for (i = 0; i < chn.n; ++i) {
- mem_chain2aln(opt, bns->l_pac, pac, s->l_seq, (uint8_t*)s->seq, &chn.a[i], &regs.a[i]);
+ mem_chain2aln(opt, bns->l_pac, pac, s->l_seq, (uint8_t*)s->seq, &chn.a[i], &tmp);
+ for (j = 0; j < tmp.n; ++j)
+ kv_push(mem_alnreg_t, regs, tmp.a[j]);
free(chn.a[i].seeds);
}
free(chn.a);
+ regs.n = mem_sort_and_dedup(regs.n, regs.a);
return regs;
}
@@ -683,10 +679,8 @@ static void *worker1(void *data)
{
worker_t *w = (worker_t*)data;
int i;
- for (i = w->start; i < w->n; i += w->step) {
+ for (i = w->start; i < w->n; i += w->step)
w->regs[i] = find_alnreg(w->opt, w->bwt, w->bns, w->pac, &w->seqs[i]);
- w->regs[i].n = mem_sort_and_dedup(w->regs[i].n, w->regs[i].a);
- }
return 0;
}
View
@@ -80,7 +80,7 @@ void mem_fill_scmat(int a, int b, int8_t mat[25]);
mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq);
int mem_chain_flt(const mem_opt_t *opt, int n_chn, mem_chain_t *chains);
-void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_t *a);
+void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *a);
uint32_t *mem_gen_cigar(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar);
int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs);
View
12 kvec.h
@@ -1,6 +1,6 @@
/* The MIT License
- Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
+ Copyright (c) 2008, by Attractive Chaos <attractor@live.co.uk>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -76,15 +76,15 @@ int main() {
(v).a[(v).n++] = (x); \
} while (0)
-#define kv_pushp(type, v) (((v).n == (v).m)? \
+#define kv_pushp(type, v) ((((v).n == (v).m)? \
((v).m = ((v).m? (v).m<<1 : 2), \
(v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
- : 0), ((v).a + ((v).n++))
+ : 0), &(v).a[(v).n++])
-#define kv_a(type, v, i) ((v).m <= (size_t)(i)? \
+#define kv_a(type, v, i) (((v).m <= (size_t)(i)? \
((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
(v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
- : (v).n <= (size_t)(i)? (v).n = (i) \
- : 0), (v).a[(i)]
+ : (v).n <= (size_t)(i)? (v).n = (i) + 1 \
+ : 0), (v).a[(i)])
#endif

0 comments on commit a578688

Please sign in to comment.