Skip to content

Commit

Permalink
Fix samtools sort -f temporary file names
Browse files Browse the repository at this point in the history
Pull request #27 introduced a bug in which worker threads wrote
temporary files with different names (prefix.NNNN.bam) to those the
merge step tried to read (prefix.NNNN).

Now bam_sort_core_ext() has separate prefix and output filename
parameters, enabling future code allowing the temporary files to be
somewhere entirely separate from the output file (e.g., $TMPDIR).

(With -f, temporary files now have slightly odd names such as
foo.bam.0001.bam, but they're only temporary anyway...
Thanks to Chris Carr for reporting this bug.)
  • Loading branch information
jmarshall committed Aug 16, 2013
1 parent d60c730 commit ccf1da9
Showing 1 changed file with 20 additions and 15 deletions.
35 changes: 20 additions & 15 deletions bam_sort.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,26 +431,22 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
@param is_by_qname whether to sort by query name
@param fn name of the file to be sorted
@param prefix prefix of the output and the temporary files; upon
sucessess, prefix.bam will be written.
@param prefix prefix of the temporary files (prefix.NNNN.bam are written)
@param fnout name of the final output file to be written
@param max_mem approxiate maximum memory (very inaccurate)
@param full_path the given output path is the full path and not just the prefix
@return 0 for successful sorting, negative on errors
@discussion It may create multiple temporary subalignment files
and then merge them by calling bam_merge_core(). This function is
NOT thread safe.
*/
int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level, int full_path)
int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, const char *fnout, size_t _max_mem, int n_threads, int level)
{
int ret, i, n_files = 0;
size_t mem, max_k, k, max_mem;
bam_header_t *header;
bamFile fp;
bam1_t *b, **buf;
char *fnout = 0;
char const *suffix = ".bam";
if (full_path) suffix += 4;

if (n_threads < 2) n_threads = 1;
g_is_by_qname = is_by_qname;
Expand Down Expand Up @@ -490,10 +486,6 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_
}
if (ret != -1)
fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n");
// output file name
fnout = calloc(strlen(prefix) + 20, 1);
if (is_stdout) sprintf(fnout, "-");
else sprintf(fnout, "%s%s", prefix, suffix);
// write the final output
if (n_files == 0) { // a single block
char mode[8];
Expand All @@ -508,7 +500,7 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_
fns = (char**)calloc(n_files, sizeof(char*));
for (i = 0; i < n_files; ++i) {
fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
sprintf(fns[i], "%s.%.4d%s", prefix, i, suffix);
sprintf(fns[i], "%s.%.4d.bam", prefix, i);
}
if (bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, n_threads, level) < 0) {
// Propagate bam_merge_core2() failure; it has already emitted a
Expand All @@ -521,7 +513,6 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_
}
free(fns);
}
free(fnout);
// free
for (k = 0; k < max_k; ++k) {
if (!buf[k]) continue;
Expand All @@ -536,13 +527,19 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_

int bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem)
{
return bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0, 0, -1, 0);
int ret;
char *fnout = calloc(strlen(prefix) + 4 + 1, 1);
sprintf(fnout, "%s.bam", prefix);
ret = bam_sort_core_ext(is_by_qname, fn, prefix, fnout, max_mem, 0, -1);
free(fnout);
return ret;
}

int bam_sort(int argc, char *argv[])
{
size_t max_mem = 768<<20; // 512MB
int c, is_by_qname = 0, is_stdout = 0, ret = 0, n_threads = 0, level = -1, full_path = 0;
char *fnout;
while ((c = getopt(argc, argv, "fnom:@:l:")) >= 0) {
switch (c) {
case 'f': full_path = 1; break;
Expand Down Expand Up @@ -572,6 +569,14 @@ int bam_sort(int argc, char *argv[])
fprintf(stderr, "\n");
return 1;
}
if (bam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout, n_threads, level, full_path) < 0) ret = 1;

if (is_stdout) fnout = strdup("-");
else {
fnout = calloc(strlen(argv[optind+1]) + 4 + 1, 1);
sprintf(fnout, "%s%s", argv[optind+1], full_path? "" : ".bam");
}

if (bam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], fnout, max_mem, n_threads, level) < 0) ret = 1;
free(fnout);
return ret;
}

0 comments on commit ccf1da9

Please sign in to comment.