Skip to content

Commit

Permalink
Fixed nuances between gzip vs bgzf compressed files.
Browse files Browse the repository at this point in the history
Credit to OSS-Fuzz
Fixes oss-fuzz 20473

This also fixes the indexing tests forbidding samtools index on
gzipped or naked SAM and BAMs.  Previously the index command did not
fail, but gave broken indices.

Ironically with 1.10 samtools index on a totally raw uncompressed BAM
did actually start working (but not gzipped BAM or raw/gzipped SAM).
This was not by design and the indices produced couldn't be used by
1.9 and prior so it's safest to forbid this case too.

Also added documentation on the ambiguous "is_bgzf" field given this
is set for BAM files, even if ungzipped.  It's purely a flag on usage
of the bgzf_open/read functions, which can read uncompressed data, and
has nothing to do with the file format actually being BGZF.
  • Loading branch information
jkbonfield committed Feb 3, 2020
1 parent ecf0216 commit 39acb95
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
3 changes: 3 additions & 0 deletions htslib/hts.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ typedef struct __hts_idx_t hts_idx_t;
// - is_write and is_cram are used directly in samtools <= 1.1
// - fp is used directly in samtools (up to and including current develop)
// - line is used directly in bcftools (up to and including current develop)
// - is_bgzf and is_cram flags indicate which fp union member to use.
// Note is_bgzf being set does not indicate the flag is BGZF compressed,
// nor even whether it is compressed at all (eg on naked BAMs).
typedef struct {
uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27;
int64_t lineno;
Expand Down
16 changes: 8 additions & 8 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ int sam_index_build3(const char *fn, const char *fnidx, int min_shift, int nthre

case bam:
case sam:
if (!fp->is_bgzf) {
if (fp->format.compression != bgzf) {
hts_log_error("%s file \"%s\" not BGZF compressed",
fp->format.format == bam ? "BAM" : "SAM", fn);
ret = -1;
Expand Down Expand Up @@ -1402,7 +1402,7 @@ static sam_hdr_t *sam_hdr_create(htsFile* fp) {
if (kputc('\n', &str) < 0)
goto error;

if (fp->format.compression == bgzf) {
if (fp->is_bgzf) {
next_c = bgzf_peek(fp->fp.bgzf);
} else {
unsigned char nc;
Expand Down Expand Up @@ -1620,7 +1620,7 @@ int sam_hdr_write(htsFile *fp, const sam_hdr_t *h)
l_text = h->l_text;
}

if (fp->format.compression == bgzf) {
if (fp->is_bgzf) {
bytes = bgzf_write(fp->fp.bgzf, text, l_text);
} else {
bytes = hwrite(fp->fp.hfile, text, l_text);
Expand All @@ -1641,7 +1641,7 @@ int sam_hdr_write(htsFile *fp, const sam_hdr_t *h)
if (r != 0)
return -1;

if (fp->format.compression == bgzf) {
if (fp->is_bgzf) {
bytes = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l);
} else {
bytes = hwrite(fp->fp.hfile, fp->line.s, fp->line.l);
Expand All @@ -1650,7 +1650,7 @@ int sam_hdr_write(htsFile *fp, const sam_hdr_t *h)
return -1;
}
}
if (fp->format.compression == bgzf) {
if (fp->is_bgzf) {
if (bgzf_flush(fp->fp.bgzf) != 0) return -1;
} else {
if (hflush(fp->fp.hfile) != 0) return -1;
Expand Down Expand Up @@ -2692,7 +2692,7 @@ static void *sam_dispatcher_write(void *vp) {
if (i < gl->data_size)
i++;

if (fp->format.compression == bgzf) {
if (fp->is_bgzf) {
if (bgzf_write(fp->fp.bgzf, &gl->data[j], i-j) != i-j)
goto err;
} else {
Expand Down Expand Up @@ -2731,7 +2731,7 @@ static void *sam_dispatcher_write(void *vp) {
gl->bams = NULL;
pthread_mutex_unlock(&fd->lines_m);
} else {
if (fp->format.compression == bgzf) {
if (fp->is_bgzf) {
if (bgzf_write(fp->fp.bgzf, gl->data, gl->data_size) != gl->data_size)
goto err;
} else {
Expand Down Expand Up @@ -3284,7 +3284,7 @@ int sam_write1(htsFile *fp, const sam_hdr_t *h, const bam1_t *b)
} else {
if (sam_format1(h, b, &fp->line) < 0) return -1;
kputc('\n', &fp->line);
if (fp->format.compression == bgzf) {
if (fp->is_bgzf) {
if ( bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l) != fp->line.l ) return -1;
} else {
if ( hwrite(fp->fp.hfile, fp->line.s, fp->line.l) != fp->line.l ) return -1;
Expand Down

0 comments on commit 39acb95

Please sign in to comment.