Skip to content

Commit

Permalink
Allow fai index to be in a different location to the indexed file.
Browse files Browse the repository at this point in the history
Convert bgzf_index_load, bgzf_index_dump, fai_load, fai_build, fai_read and
fai_save to use hfile instead of stdio.  This allows access to remote
indexes via http, ftp etc. and the plugin infrastructure.

Add new API interfaces fai_build3() and fai_load3() which take separate
names for the fai and gzi index files.  If an index file name is
passed in as NULL, it is derived from the name of the file being indexed
as with fai_build() and fai_load().  As a result, fai_build() and
fai_load() are replaced by simple wrappers that call fai_build3() and
fai_load3() with NULL index file names.

The download_and_open() function which made local copies of remote index
files is removed.  The side effect of creating local files was not
desirable in some cases, and download_and_open() suffered from race
conditions if two processes tried to access the same index simultaneously.
It was also not called for .gzi files.  fai_build3() and fai_load3() can
directly access remote files for both indexed, .fai and .gzi files.

This removes fai_save() as a public symbol in libhts.so, but this
function does not appear in a public header file so is not part of
the official HTSlib API or ABI.
  • Loading branch information
daviesrob authored and jkbonfield committed Feb 8, 2017
1 parent ff71a31 commit 3dc96c5
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 138 deletions.
4 changes: 4 additions & 0 deletions NEWS
Expand Up @@ -10,6 +10,7 @@ Noteworthy changes in release 1.4
- auxiliary fields in bam1_t are now always stored in little-endian byte
order (previously this depended on if you read a bam, sam or cram file)
- bam_aux2i() now returns an int64_t value
- fai_load() will no longer save local copies of remote fasta indexes

* New errmod_cal(), probaln_glocal(), sam_cap_mapq(), and sam_prob_realn()
functions, previously internal to SAMtools, have been added to HTSlib.
Expand Down Expand Up @@ -47,6 +48,9 @@ Noteworthy changes in release 1.4
the bam_aux2 and bam_auxB2 functions will set errno if the requested
conversion is not valid.

* New functions fai_load3() and fai_build3() allow fasta indexes to be
stored in a different location to the indexed fasta file.

Noteworthy changes in release 1.3.2 (13 September 2016)

* Corrected bin calculation when converting directly from CRAM to BAM.
Expand Down
37 changes: 20 additions & 17 deletions bgzf.c
Expand Up @@ -1694,10 +1694,10 @@ int bgzf_index_add_block(BGZF *fp)
return 0;
}

static inline int fwrite_uint64(uint64_t x, FILE *f)
static inline int hwrite_uint64(uint64_t x, hFILE *f)
{
if (ed_is_big()) x = ed_swap_8(x);
if (fwrite(&x, sizeof x, 1, f) != 1) return -1;
if (hwrite(f, &x, sizeof(x)) != sizeof(x)) return -1;
return 0;
}

Expand All @@ -1717,12 +1717,12 @@ int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix)
memcpy(tmp+blen,suffix,slen+1);
}

FILE *idx = fopen(tmp?tmp:bname,"wb");
hFILE *idx = hopen(tmp?tmp:bname,"wb");
if ( tmp ) free(tmp);
if ( !idx ) {
if (hts_verbose > 1)
{
fprintf(stderr, "[E::%s] Error opening %s%s : %s\n",
fprintf(stderr, "[E::%s] Error opening %s%s for writing: %s\n",
__func__, bname, suffix ? suffix : "", strerror(errno));
}
return -1;
Expand All @@ -1733,14 +1733,14 @@ int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix)
// This is not a bug.

int i;
if (fwrite_uint64(fp->idx->noffs - 1, idx) < 0) goto fail;
if (hwrite_uint64(fp->idx->noffs - 1, idx) < 0) goto fail;
for (i=1; i<fp->idx->noffs; i++)
{
if (fwrite_uint64(fp->idx->offs[i].caddr, idx) < 0) goto fail;
if (fwrite_uint64(fp->idx->offs[i].uaddr, idx) < 0) goto fail;
if (hwrite_uint64(fp->idx->offs[i].caddr, idx) < 0) goto fail;
if (hwrite_uint64(fp->idx->offs[i].uaddr, idx) < 0) goto fail;
}

if (fclose(idx) < 0)
if (hclose(idx) < 0)
{
if (hts_verbose > 1)
{
Expand All @@ -1757,13 +1757,13 @@ int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix)
fprintf(stderr, "[E::%s] Error writing to %s%s : %s\n",
__func__, bname, suffix ? suffix : "", strerror(errno));
}
fclose(idx);
hclose_abruptly(idx);
return -1;
}

static inline int fread_uint64(uint64_t *xptr, FILE *f)
static inline int hread_uint64(uint64_t *xptr, hFILE *f)
{
if (fread(xptr, sizeof *xptr, 1, f) != 1) return -1;
if (hread(f, xptr, sizeof(*xptr)) != sizeof(*xptr)) return -1;
if (ed_is_big()) ed_swap_8p(xptr);
return 0;
}
Expand All @@ -1781,7 +1781,7 @@ int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix)
memcpy(tmp+blen,suffix,slen+1);
}

FILE *idx = fopen(tmp?tmp:bname,"rb");
hFILE *idx = hopen(tmp?tmp:bname,"rb");
if ( tmp ) free(tmp);
if ( !idx ) {
if (hts_verbose > 1) {
Expand All @@ -1794,7 +1794,7 @@ int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix)
fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t));
if (fp->idx == NULL) goto fail;
uint64_t x;
if (fread_uint64(&x, idx) < 0) goto fail;
if (hread_uint64(&x, idx) < 0) goto fail;

fp->idx->noffs = fp->idx->moffs = x + 1;
fp->idx->offs = (bgzidx1_t*) malloc(fp->idx->moffs*sizeof(bgzidx1_t));
Expand All @@ -1804,11 +1804,14 @@ int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix)
int i;
for (i=1; i<fp->idx->noffs; i++)
{
if (fread_uint64(&fp->idx->offs[i].caddr, idx) < 0) goto fail;
if (fread_uint64(&fp->idx->offs[i].uaddr, idx) < 0) goto fail;
if (hread_uint64(&fp->idx->offs[i].caddr, idx) < 0) goto fail;
if (hread_uint64(&fp->idx->offs[i].uaddr, idx) < 0) goto fail;
}

if (fclose(idx) != 0) goto fail;
if (hclose(idx) != 0) {
idx = NULL;
goto fail;
}
return 0;

fail:
Expand All @@ -1817,7 +1820,7 @@ int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix)
fprintf(stderr, "[E::%s] Error reading %s%s : %s\n",
__func__, bname, suffix ? suffix : "", strerror(errno));
}
fclose(idx);
if (idx) hclose_abruptly(idx);
if (fp->idx) {
free(fp->idx->offs);
free(fp->idx);
Expand Down

0 comments on commit 3dc96c5

Please sign in to comment.