Skip to content

Commit

Permalink
implemented in exts; testing is the next
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Apr 28, 2019
1 parent cdc730d commit be171aa
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 20 deletions.
22 changes: 14 additions & 8 deletions align.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
}
}

static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int end_bonus, int zdrop, int flag, ksw_extz_t *ez)
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const uint8_t *junc, const int8_t *mat, int w, int end_bonus, int zdrop, int flag, ksw_extz_t *ez)
{
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
int i;
Expand All @@ -305,7 +305,7 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
ksw_reset_extz(ez);
ez->zdropped = 1;
} else if (opt->flag & MM_F_SPLICE)
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, flag, ez);
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus, flag, junc, ez);
else if (opt->q == opt->q2 && opt->e == opt->e2)
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, flag, ez);
else
Expand Down Expand Up @@ -547,7 +547,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
{
int is_sr = !!(opt->flag & MM_F_SR), is_splice = !!(opt->flag & MM_F_SPLICE);
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1;
uint8_t *tseq, *qseq;
uint8_t *tseq, *qseq, *junc;
int32_t i, l, bw, dropped = 0, extra_flag = 0, rs0, re0, qs0, qe0;
int32_t rs, re, qs, qe;
int32_t rs1, qs1, re1, qe1;
Expand Down Expand Up @@ -666,13 +666,16 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int

assert(re0 > rs0);
tseq = (uint8_t*)kmalloc(km, re0 - rs0);
junc = (uint8_t*)kmalloc(km, re0 - rs0);

if (qs > 0 && rs > 0) { // left extension; probably the condition can be changed to "qs > qs0 && rs > rs0"
qseq = &qseq0[rev][qs0];
mm_idx_getseq(mi, rid, rs0, rs, tseq);
mm_idx_bed_junc(mi, rid, rs0, rs, junc);
mm_seq_rev(qs - qs0, qseq);
mm_seq_rev(rs - rs0, tseq);
mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, mat, bw, opt->end_bonus, r->split_inv? opt->zdrop_inv : opt->zdrop, extra_flag|KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
mm_seq_rev(rs - rs0, junc);
mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, junc, mat, bw, opt->end_bonus, r->split_inv? opt->zdrop_inv : opt->zdrop, extra_flag|KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
if (ez->n_cigar > 0) {
mm_append_cigar(r, ez->n_cigar, ez->cigar);
r->p->dp_score += ez->max;
Expand All @@ -698,6 +701,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
// perform alignment
qseq = &qseq0[rev][qs];
mm_idx_getseq(mi, rid, rs, re, tseq);
mm_idx_bed_junc(mi, rid, rs, re, junc);
if (is_sr) { // perform ungapped alignment
assert(qe - qs == re - rs);
ksw_reset_extz(ez);
Expand All @@ -707,11 +711,11 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
}
ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, 0, qe - qs);
} else { // perform normal gapped alignment
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, -1, opt->zdrop, extra_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, opt->zdrop, extra_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
}
// test Z-drop and inversion Z-drop
if ((zdrop_code = mm_test_zdrop(km, opt, qseq, tseq, ez->n_cigar, ez->cigar, mat)) != 0)
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, -1, zdrop_code == 2? opt->zdrop_inv : opt->zdrop, extra_flag, ez); // second pass: lift approximate
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, zdrop_code == 2? opt->zdrop_inv : opt->zdrop, extra_flag, ez); // second pass: lift approximate
// update CIGAR
if (ez->n_cigar > 0)
mm_append_cigar(r, ez->n_cigar, ez->cigar);
Expand All @@ -737,7 +741,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
if (!dropped && qe < qe0 && re < re0) { // right extension
qseq = &qseq0[rev][qe];
mm_idx_getseq(mi, rid, re, re0, tseq);
mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, mat, bw, opt->end_bonus, opt->zdrop, extra_flag|KSW_EZ_EXTZ_ONLY, ez);
mm_idx_bed_junc(mi, rid, re, re0, junc);
mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, junc, mat, bw, opt->end_bonus, opt->zdrop, extra_flag|KSW_EZ_EXTZ_ONLY, ez);
if (ez->n_cigar > 0) {
mm_append_cigar(r, ez->n_cigar, ez->cigar);
r->p->dp_score += ez->max;
Expand All @@ -760,6 +765,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
}

kfree(km, tseq);
kfree(km, junc);
}

static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], const mm_reg1_t *r1, const mm_reg1_t *r2, mm_reg1_t *r_inv, ksw_extz_t *ez)
Expand Down Expand Up @@ -793,7 +799,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
mm_seq_rev(tl, tseq);
if (score < opt->min_dp_max) goto end_align1_inv;
q_off = ql - (q_off + 1), t_off = tl - (t_off + 1);
mm_align_pair(km, opt, ql - q_off, qseq + q_off, tl - t_off, tseq + t_off, mat, (int)(opt->bw * 1.5), -1, opt->zdrop, KSW_EZ_EXTZ_ONLY, ez);
mm_align_pair(km, opt, ql - q_off, qseq + q_off, tl - t_off, tseq + t_off, 0, mat, (int)(opt->bw * 1.5), -1, opt->zdrop, KSW_EZ_EXTZ_ONLY, ez);
if (ez->n_cigar == 0) goto end_align1_inv; // should never be here
mm_append_cigar(r_inv, ez->n_cigar, ez->cigar);
r_inv->p->dp_score = ez->max;
Expand Down
4 changes: 2 additions & 2 deletions index.c
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ int mm_idx_bed_read(mm_idx_t *mi, const char *fn)
return mi->I? 0 : -1;
}

int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, int32_t strand, int8_t *s)
int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s)
{
int32_t i, left, right;
mm_idx_intv_t *r;
Expand All @@ -682,7 +682,7 @@ int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, int
else left = mid + 1;
}
for (i = left; i < r->n; ++i) {
if (st <= r->a[i].st && en >= r->a[i].en && r->a[i].strand != 0 && strand * r->a[i].strand >= 0) {
if (st <= r->a[i].st && en >= r->a[i].en && r->a[i].strand != 0) {
if (r->a[i].strand > 0) {
s[r->a[i].st] |= 1, s[r->a[i].en - 1] |= 2;
} else {
Expand Down
2 changes: 1 addition & 1 deletion ksw2.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);

void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);

void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez);

Expand Down
10 changes: 5 additions & 5 deletions ksw2_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,17 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
}

void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
{
extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
if (ksw_simd < 0) ksw_simd = x86_simd();
if (ksw_simd & SIMD_SSE4_1)
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez);
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
else if (ksw_simd & SIMD_SSE2)
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez);
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
else abort();
}
#endif
14 changes: 11 additions & 3 deletions ksw2_exts2_sse.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
#ifdef KSW_CPU_DISPATCH
#ifdef __SSE4_1__
void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
#else
void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
#endif
#else
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
#endif // ~KSW_CPU_DISPATCH
{
#define __dp_code_block1 \
Expand Down Expand Up @@ -120,6 +120,10 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
if (can_type && (target[t+3] == 0 || target[t+3] == 2)) can_type = 2;
if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
}
if (junc)
for (t = 0; t < tlen - 1; ++t)
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&2)))
((int8_t*)donor)[t] += junc_bonus;
memset(acceptor, -noncan, tlen_ * 16);
for (t = 2; t < tlen; ++t) {
int can_type = 0;
Expand All @@ -128,6 +132,10 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
if (can_type && (target[t-2] == 1 || target[t-2] == 3)) can_type = 2;
if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
}
if (junc)
for (t = 0; t < tlen; ++t)
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&1)))
((int8_t*)acceptor)[t] += junc_bonus;
}

for (r = 0, last_st = last_en = -1; r < qlen + tlen - 1; ++r) {
Expand Down
3 changes: 2 additions & 1 deletion minimap.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ typedef struct {
int a, b, q, e, q2, e2; // matching score, mismatch, gap-open and gap-ext penalties
int sc_ambi; // score when one or both bases are "N"
int noncan; // cost of non-canonical splicing sites
int junc_bonus;
int zdrop, zdrop_inv; // break alignment if alignment score drops too fast along the diagonal
int end_bonus;
int min_dp_max; // drop an alignment if the score of the max scoring segment is below this threshold
Expand Down Expand Up @@ -367,7 +368,7 @@ int mm_idx_name2id(const mm_idx_t *mi, const char *name);
int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);

int mm_idx_bed_read(mm_idx_t *mi, const char *fn);
int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, int32_t strand, int8_t *s);
int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s);

// deprecated APIs for backward compatibility
void mm_mapopt_init(mm_mapopt_t *opt);
Expand Down
1 change: 1 addition & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
mo->max_gap = 2000, mo->max_gap_ref = mo->bw = 200000;
mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0;
mo->noncan = 9;
mo->junc_bonus = 9;
mo->zdrop = 200, mo->zdrop_inv = 100; // because mo->a is halved
} else return -1;
return 0;
Expand Down
1 change: 1 addition & 0 deletions python/cmappy.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ cdef extern from "minimap.h":
int a, b, q, e, q2, e2
int sc_ambi
int noncan
int junc_bonus
int zdrop, zdrop_inv
int end_bonus
int min_dp_max
Expand Down

0 comments on commit be171aa

Please sign in to comment.