Skip to content
Browse files

Merge branch 'master' into api

  • Loading branch information...
2 parents 9c486fa + cd81868 commit 72f1a954483d508b0c16777c7cd5b91669d59886 @lh3 committed
Showing with 18 additions and 10 deletions.
  1. +3 −2 bwtsw2.h
  2. +2 −1 bwtsw2_aux.c
  3. +6 −2 bwtsw2_main.c
  4. +7 −5 bwtsw2_pair.c
View
5 bwtsw2.h
@@ -12,8 +12,9 @@
#define BSW2_FLAG_RESCUED 0x800
typedef struct {
- int a, b, q, r, t, qr, bw;
- int z, is, t_seeds, hard_clip, multi_2nd;
+ int skip_sw:16, hard_clip:16;
+ int a, b, q, r, t, qr, bw, max_ins;
+ int z, is, t_seeds, multi_2nd;
float mask_level, coef;
int n_threads, chunk_size;
} bsw2opt_t;
View
3 bwtsw2_aux.c
@@ -50,7 +50,8 @@ bsw2opt_t *bsw2_init_opt()
bsw2opt_t *o = (bsw2opt_t*)calloc(1, sizeof(bsw2opt_t));
o->a = 1; o->b = 3; o->q = 5; o->r = 2; o->t = 30;
o->bw = 50;
- o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0;
+ o->max_ins = 20000;
+ o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0; o->skip_sw = 0;
o->mask_level = 0.50f; o->coef = 5.5f;
o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000;
return o;
View
8 bwtsw2_main.c
@@ -18,7 +18,7 @@ int bwa_bwtsw2(int argc, char *argv[])
opt = bsw2_init_opt();
srand48(11);
- while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:M")) >= 0) {
+ while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:S")) >= 0) {
switch (c) {
case 'q': opt->q = atoi(optarg); break;
case 'r': opt->r = atoi(optarg); break;
@@ -35,6 +35,8 @@ int bwa_bwtsw2(int argc, char *argv[])
case 'M': opt->multi_2nd = 1; break;
case 'H': opt->hard_clip = 1; break;
case 'f': xreopen(optarg, "w", stdout); break;
+ case 'I': opt->max_ins = atoi(optarg); break;
+ case 'S': opt->skip_sw = 1; break;
}
}
opt->qr = opt->q + opt->r;
@@ -50,9 +52,11 @@ int bwa_bwtsw2(int argc, char *argv[])
fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level);
fprintf(stderr, "\n");
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
- fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
+ fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n");
fprintf(stderr, " -M mark multi-part alignments as secondary\n");
+ fprintf(stderr, " -S skip Smith-Waterman read pairing\n");
+ fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);
fprintf(stderr, "\n");
fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t);
fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
View
12 bwtsw2_pair.c
@@ -12,7 +12,6 @@
#include "stdaln.h"
#endif
-#define MAX_INS 20000
#define MIN_RATIO 0.8
#define OUTLIER_BOUND 2.0
#define MAX_STDDEV 4.0
@@ -23,7 +22,7 @@ typedef struct {
double avg, std;
} bsw2pestat_t;
-bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg)
+bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
{
extern void ks_introsort_uint64_t(size_t n, uint64_t *a);
int i, k, x, p25, p50, p75, tmp, max_len = 0;
@@ -40,6 +39,7 @@ bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg)
if (t[0]->G2 > 0.8 * t[0]->G) continue; // the best hit is not good enough
if (t[1]->G2 > 0.8 * t[1]->G) continue; // the best hit is not good enough
l = t[0]->k > t[1]->k? t[0]->k - t[1]->k + t[1]->len : t[1]->k - t[0]->k + t[0]->len;
+ if (l >= max_ins) continue; // skip pairs with excessively large insert
max_len = max_len > t[0]->end - t[0]->beg? max_len : t[0]->end - t[0]->beg;
max_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg;
isize[k++] = l;
@@ -186,7 +186,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b
int8_t g_mat[25];
kstring_t msg;
memset(&msg, 0, sizeof(kstring_t));
- pes = bsw2_stat(n, hits, &msg);
+ pes = bsw2_stat(n, hits, &msg, opt->max_ins);
for (i = k = 0; i < 5; ++i) {
for (j = 0; j < 4; ++j)
g_mat[k++] = i == j? opt->a : -opt->b;
@@ -207,8 +207,10 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b
if (hits[i] == 0 || hits[i+1] == 0) continue; // one end has excessive N
if (hits[i]->n != 1 && hits[i+1]->n != 1) continue; // no end has exactly one hit
if (hits[i]->n > 1 || hits[i+1]->n > 1) continue; // one read has more than one hit
- if (hits[i+0]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+0]->hits[0], seq[i+1].l, seq[i+1].seq, &a[1], g_mat);
- if (hits[i+1]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+1]->hits[0], seq[i+0].l, seq[i+0].seq, &a[0], g_mat);
+ if (!opt->skip_sw) {
+ if (hits[i+0]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+0]->hits[0], seq[i+1].l, seq[i+1].seq, &a[1], g_mat);
+ if (hits[i+1]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+1]->hits[0], seq[i+0].l, seq[i+0].seq, &a[0], g_mat);
+ } // else a[0].G == a[1].G == a[0].G2 == a[1].G2 == 0
// the following enumerate all possibilities. It is tedious but necessary...
if (hits[i]->n + hits[i+1]->n == 1) { // one end mapped; the other not;
bwtsw2_t *p[2];

0 comments on commit 72f1a95

Please sign in to comment.
Something went wrong with that request. Please try again.