Permalink
Browse files

fixed bug where sliding window didn't use last window, fixed bug wher…

…e quality index went beyond end of quality string in corner cases, added error checking for base quality outside of expected range
  • Loading branch information...
1 parent 09febb6 commit 032bf41f962ffeacca9a21ba222b6238760bc1cf @najoshi committed Mar 9, 2012
Showing with 34 additions and 14 deletions.
  1. +1 −1 Makefile
  2. +10 −3 src/sickle.h
  3. +23 −8 src/sliding.c
  4. +0 −1 src/trim_paired.c
  5. +0 −1 src/trim_single.c
View
@@ -1,5 +1,5 @@
PROGRAM_NAME = sickle
-VERSION = 1.1
+VERSION = 1.2
CC = gcc
CFLAGS = -Wall -pedantic -DVERSION=$(VERSION)
DEBUG = -g
View
@@ -68,16 +68,23 @@ typedef enum {
ILLUMINA
} quality_type;
+static const char typenames[4][10] = {
+ {"Phred"},
+ {"Sanger"},
+ {"Solexa"},
+ {"Illumina"}
+};
+
#define Q_OFFSET 0
#define Q_MIN 1
#define Q_MAX 2
static const int quality_constants[4][3] = {
/* offset, min, max */
{0, 4, 60}, /* PHRED */
- {33, 0, 93}, /* SANGER */
- {64, -5, 62}, /* SOLEXA; this is an approx; the transform is non-linear */
- {64, 0, 62} /* ILLUMINA */
+ {33, 33, 80}, /* SANGER */
+ {64, 58, 112}, /* SOLEXA; this is an approx; the transform is non-linear */
+ {64, 64, 110} /* ILLUMINA */
};
typedef struct __cutsites_ {
View
@@ -7,15 +7,28 @@
#include "sickle.h"
#include "kseq.h"
-int get_quality_num (char qualchar, int qualtype) {
+int get_quality_num (char qualchar, int qualtype, kseq_t *fqrec, int pos) {
/*
Return the adjusted quality, depending on quality type.
Note that this uses the array in sickle.h, which *approximates*
the SOLEXA (pre-1.3 pipeline) qualities as linear. This is
inaccurate with low-quality bases.
*/
- return((int) qualchar - quality_constants[qualtype][Q_OFFSET]);
+
+ int qual_value = (int) qualchar;
+
+ if (qual_value < quality_constants[qualtype][Q_MIN] || qual_value > quality_constants[qualtype][Q_MAX]) {
+ fprintf (stderr, "ERROR: Quality value (%d) does not fall within correct range for %s encoding.\n", qual_value, typenames[qualtype]);
+ fprintf (stderr, "Range for %s encoding: %d-%d\n", typenames[qualtype], quality_constants[qualtype][Q_MIN], quality_constants[qualtype][Q_MAX]);
+ fprintf (stderr, "FastQ record: %s\n", fqrec->name.s);
+ fprintf (stderr, "Quality string: %s\n", fqrec->qual.s);
+ fprintf (stderr, "Quality char: '%c'\n", qualchar);
+ fprintf (stderr, "Quality position: %d\n", pos+1);
+ exit(1);
+ }
+
+ return (qual_value - quality_constants[qualtype][Q_OFFSET]);
}
@@ -46,10 +59,10 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int
if (window_size == 0) window_size = fqrec->seq.l;
for (i=0; i<window_size; i++) {
- window_total += get_quality_num (fqrec->qual.s[i], qualtype);
+ window_total += get_quality_num (fqrec->qual.s[i], qualtype, fqrec, i);
}
- for (i=0; i<fqrec->qual.l; i++) {
+ for (i=0; i <= fqrec->qual.l - window_size; i++) {
window_avg = (double)window_total / (double)window_size;
@@ -59,7 +72,7 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int
/* at what point in the window does the quality go above the threshold? */
for (j=window_start; j<window_start+window_size; j++) {
- if (get_quality_num (fqrec->qual.s[j], qualtype) >= qual_threshold) {
+ if (get_quality_num (fqrec->qual.s[j], qualtype, fqrec, j) >= qual_threshold) {
five_prime_cut = j;
break;
}
@@ -76,7 +89,7 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int
/* at what point in the window does the quality dip below the threshold? */
for (j=window_start; j<window_start+window_size; j++) {
- if (get_quality_num (fqrec->qual.s[j], qualtype) < qual_threshold) {
+ if (get_quality_num (fqrec->qual.s[j], qualtype, fqrec, j) < qual_threshold) {
three_prime_cut = j;
/* if cutting length is less than threshold then return -1 for both */
@@ -93,8 +106,10 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int
}
/* instead of sliding the window, subtract the first qual and add the next qual */
- window_total -= get_quality_num (fqrec->qual.s[window_start], qualtype);
- window_total += get_quality_num (fqrec->qual.s[window_start+window_size], qualtype);
+ window_total -= get_quality_num (fqrec->qual.s[window_start], qualtype, fqrec, window_start);
+ if (window_start+window_size < fqrec->qual.l) {
+ window_total += get_quality_num (fqrec->qual.s[window_start+window_size], qualtype, fqrec, window_start+window_size);
+ }
window_start++;
}
View
@@ -108,7 +108,6 @@ int paired_main (int argc, char *argv[]) {
case 't':
if (!strcmp (optarg, "illumina")) qualtype = ILLUMINA;
- else if (!strcmp (optarg, "phred")) qualtype = PHRED;
else if (!strcmp (optarg, "solexa")) qualtype = SOLEXA;
else if (!strcmp (optarg, "sanger")) qualtype = SANGER;
else {
View
@@ -83,7 +83,6 @@ int single_main (int argc, char *argv[]) {
case 't':
if (!strcmp (optarg, "illumina")) qualtype = ILLUMINA;
- else if (!strcmp (optarg, "phred")) qualtype = PHRED;
else if (!strcmp (optarg, "solexa")) qualtype = SOLEXA;
else if (!strcmp (optarg, "sanger")) qualtype = SANGER;
else {

0 comments on commit 032bf41

Please sign in to comment.