Permalink
Browse files

r329: ditch stdaln.{c,h}; no changes to bwa-mem

stdaln.{c,h} was written ten years ago. Its local and SW extension code are
actually buggy (though that rarely happens and usually does not affect the
results too much). ksw.{c,h} is more concise, potentially faster, less buggy,
and richer in features.
  • Loading branch information...
1 parent bb37e14 commit 98f896675094c3bb12203717f29b45757e5fd056 @lh3 committed Mar 5, 2013
Showing with 33 additions and 1,281 deletions.
  1. +3 −3 Makefile
  2. +21 −20 bwape.c
  3. +3 −3 bwase.c
  4. +0 −15 bwtaln.c
  5. +5 −7 bwtaln.h
  6. +1 −1 main.c
  7. +0 −1,070 stdaln.c
  8. +0 −162 stdaln.h
View
@@ -4,7 +4,7 @@ CXXFLAGS= $(CFLAGS)
AR= ar
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
-AOBJS= QSufSort.o bwt_gen.o stdaln.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
+AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
is.o bwtindex.o bwape.o kopen.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
@@ -48,8 +48,8 @@ fastmap.o:bwt.h bwamem.h
bwtaln.o:bwt.h bwtaln.h kseq.h
bwtgap.o:bwtgap.h bwtaln.h bwt.h
-bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
-bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
+bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h
+bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h
bwtsw2_main.o:bwtsw2.h
clean:
View
@@ -8,9 +8,9 @@
#include "kvec.h"
#include "bntseq.h"
#include "utils.h"
-#include "stdaln.h"
#include "bwase.h"
#include "bwa.h"
+#include "ksw.h"
typedef struct {
int n;
@@ -397,16 +397,17 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
#define SW_MIN_MAPQ 17
// cnt = n_mm<<16 | n_gapo<<8 | n_gape
-bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen,
- int *n_cigar, uint32_t *_cnt)
+bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen, int *n_cigar, uint32_t *_cnt)
{
+ kswr_t r;
+ uint32_t *cigar32 = 0;
bwa_cigar_t *cigar = 0;
ubyte_t *ref_seq;
bwtint_t k, x, y, l;
- int path_len, ret, subo;
- AlnParam ap = aln_param_bwa;
- path_t *path, *p;
+ int xtra;
+ int8_t mat[25];
+ bwa_fill_scmat(1, 3, mat);
// check whether there are too many N's
if (reglen < SW_MIN_MATCH_LEN || (int64_t)l_pac - *beg < len) return 0;
for (k = 0, x = 0; k < len; ++k)
@@ -417,15 +418,19 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
ref_seq = (ubyte_t*)calloc(reglen, 1);
for (k = *beg, l = 0; l < reglen && k < l_pac; ++k)
ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
- path = (path_t*)calloc(l+len, sizeof(path_t));
// do alignment
- ret = aln_local_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len, 1, &subo);
- if (ret < 0 || subo == ret) { // no hit or tandem hits
- free(path); free(cigar); free(ref_seq); *n_cigar = 0;
+ xtra = KSW_XSUBO | KSW_XSTART | (len < 250? KSW_XBYTE : 0);
+ r = ksw_align(len, (uint8_t*)seq, l, ref_seq, 5, mat, 5, 1, xtra, 0);
+ ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32);
+ cigar = (bwa_cigar_t*)cigar32;
+ for (k = 0; k < *n_cigar; ++k)
+ cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
+
+ if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score) { // poor hit or tandem hits
+ free(cigar); free(ref_seq); *n_cigar = 0;
return 0;
}
- cigar = bwa_aln_path2cigar(path, path_len, n_cigar);
// check whether the alignment is good enough
for (k = 0, x = y = 0; k < *n_cigar; ++k) {
@@ -435,17 +440,14 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
else y += __cigar_len(c);
}
if (x < SW_MIN_MATCH_LEN || y < SW_MIN_MATCH_LEN) { // not good enough
- free(path); free(cigar); free(ref_seq);
+ free(cigar); free(ref_seq);
*n_cigar = 0;
return 0;
}
{ // update cigar and coordinate;
- int start, end;
- p = path + path_len - 1;
- *beg += (p->i? p->i : 1) - 1;
- start = (p->j? p->j : 1) - 1;
- end = path->j;
+ int start = r.qb, end = r.qe + 1;
+ *beg += r.tb;
cigar = (bwa_cigar_t*)realloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2));
if (start) {
memmove(cigar + 1, cigar, sizeof(bwa_cigar_t) * (*n_cigar));
@@ -462,8 +464,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
{ // set *cnt
int n_mm, n_gapo, n_gape;
n_mm = n_gapo = n_gape = 0;
- p = path + path_len - 1;
- x = p->i? p->i - 1 : 0; y = p->j? p->j - 1 : 0;
+ x = r.tb; y = r.qb;
for (k = 0; k < *n_cigar; ++k) {
bwa_cigar_t c = cigar[k];
if (__cigar_op(c) == FROM_M) {
@@ -479,7 +480,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
*_cnt = (uint32_t)n_mm<<16 | n_gapo<<8 | n_gape;
}
- free(ref_seq); free(path);
+ free(ref_seq);
return cigar;
}
View
@@ -4,7 +4,6 @@
#include <stdlib.h>
#include <math.h>
#include <time.h>
-#include "stdaln.h"
#include "bwase.h"
#include "bwtaln.h"
#include "bntseq.h"
@@ -205,8 +204,8 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
if (__cigar_op(cigar[*n_cigar-1]) == FROM_D) --(*n_cigar); // deletion at the 3'-end
// change "I" at either end of the read to S. just in case. This should rarely happen...
- if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(3, (__cigar_len(cigar[*n_cigar-1])));
- if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(3, (__cigar_len(cigar[0])));
+ if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(FROM_S, (__cigar_len(cigar[*n_cigar-1])));
+ if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(FROM_S, (__cigar_len(cigar[0])));
*_pos = (bwtint_t)__pos;
free(ref_seq);
@@ -589,5 +588,6 @@ int bwa_sai2sam_se(int argc, char *argv[])
return 0;
}
bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line);
+ free(prefix);
return 0;
}
View
@@ -312,18 +312,3 @@ int bwa_aln(int argc, char *argv[])
free(opt); free(prefix);
return 0;
}
-
-/* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
-__cigar_op and __cigar_len while keeping stdaln stand alone */
-bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar)
-{
- uint32_t *cigar32;
- bwa_cigar_t *cigar;
- int i;
- cigar32 = aln_path2cigar32((path_t*) path, path_len, n_cigar);
- cigar = (bwa_cigar_t*)cigar32;
- for (i = 0; i < *n_cigar; ++i)
- cigar[i] = __cigar_create( (cigar32[i]&0xf), (cigar32[i]>>4) );
- return cigar;
-}
-
View
@@ -28,6 +28,11 @@
#define bns_pac(pac, k) ((pac)[(k)>>2] >> ((~(k)&3)<<1) & 3)
#endif
+#define FROM_M 0
+#define FROM_I 1
+#define FROM_D 2
+#define FROM_S 3
+
typedef struct {
bwtint_t w;
int bid;
@@ -138,13 +143,6 @@ extern "C" {
void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac);
-
- /* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
- __cigar_op and __cigar_len while keeping stdaln stand alone */
-#include "stdaln.h"
-
- bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
-
#ifdef __cplusplus
}
#endif
View
@@ -4,7 +4,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.7.0-r324-beta"
+#define PACKAGE_VERSION "0.7.0-r329-beta"
#endif
static int usage()
Oops, something went wrong. Retry.

0 comments on commit 98f8966

Please sign in to comment.