diff --git a/CHANGES.md b/CHANGES.md index e30b7837..9f02e9e3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,36 @@ # NCBI External Developer Release: +## SRA Toolkit 2.8.2 +**March 6, 2017** + + **blast**: Updated blast library to be able to process runs having empty rows + **blast, build**: removed library dependencies that were preventing users from launching these tools + **blast, sra-tools**: Prepared completely static build of blast tools for windows with HTTPS support + **build**: **bam-load**: changed memcpy to memmove when regions overlap - seems to corrupt data on centos7 + **build**: Added ability to specify ncbi-vdb/configure --with-magic-prefix. Look for libraries in (lib lib64) when running "configure --with-...-prefix" + **build**: configure detects location of ngs libraries + **build**: configure was fixed to skip options unrecognized by gcc 4.4.7 + **build**: created sra-toolkit Debian package + **build**: fixed a bug in 'configure' when in could not find source files in repository saved with non-standard name + **build, ncbi-vdb, sra-tools**: installation will back up old configuration files if they differ from the ones being installed + **cg-load**: added loading of CG File format v2.4 + **kns**: SRA tools respect standard set of environment variables for proxy specification + **kns**: updated mbedtls library to version 2.4.1 + **ncbi-vdb, ngs, ngs-tools, sra-tools**: eliminated memcpy from sources due to potential for overlap + **ngs, sra-search**: now supports search on reference + **ngs-tools**: updated the NCBI download page to incorporate ngs versions into 3rd party package names + **prefetch**: Fixed error message "path excessive while opening directory" when prefetch is trying to get vdbcache + **prefetch**: Fixed regression in prefetch-2.8.1 when downloading dbGaP files via HTTP + **prefetch**: Fixed regression in prefetch-2.8.1 when downloading vdbcache files from dbGaP via HTTP + **sam-dump**: consistency of sam-dump in fastq-mod improved + **sam-dump**: consistency of sam-dump in fastq-mode improved + **sra-stat**: sra-stat does XML-escaping when printing spot-groups + **test-sra**: extended test-sra to debug user problems with https connections to NCBI + **test-sra**: test-sra print amount of available disk space in user repositories + **vdb-config**: vdb-config correctly works when there is non-canonical path in configuration + + ## SRA Toolkit 2.8.1-2 **January 19, 2017** diff --git a/build/Makefile.env b/build/Makefile.env index 46cc5f53..9b5f2064 100644 --- a/build/Makefile.env +++ b/build/Makefile.env @@ -200,7 +200,7 @@ endif .PHONY: stdclean removelinks makedirs vers-includes rebuild-dirlinks .PHONY: stdjclean makejdirs -# configuration targets +# configuration targets out: @ echo $(OUTDIR) > $(TOP)/build/OUTDIR.$(BUILD_OS) @ $(MAKE) TOP=$(TOP) -f $(TOP)/build/Makefile.env rebuild-dirlinks @@ -362,11 +362,11 @@ LDFLAGS = $(DBG) $(PROF) $(CARCH) $(MIN_DEPLOY_OS_OPT) $(LDPATHS) #------------------------------------------------------------------------------- # runtests -# +# # MallocScribble=1 is for catching allocation problems on Mac # ifeq ($(RUNTESTS_OVERRIDE),) -runtests: std $(TEST_TOOLS) +runtests: all $(TEST_TOOLS) @ export VDB_CONFIG=$(VDB_CONFIG);export LD_LIBRARY_PATH=$(LIBDIR):$$LD_LIBRARY_PATH;export MallocScribble=1;\ for i in $(TEST_TOOLS);\ do\ @@ -380,11 +380,11 @@ endif #------------------------------------------------------------------------------- # slowtests -# +# # $(SLOWTESTSDATADIR) should be used to create temporary test files SLOWTESTSDATADIR ?= /panfs/pan1.be-md.ncbi.nlm.nih.gov/sra-test/slowtests/$(shell whoami) -slowtests: std $(SLOW_TEST_TOOLS) +slowtests: all $(SLOW_TEST_TOOLS) @ export LD_LIBRARY_PATH=$(LIBDIR):$$LD_LIBRARY_PATH;\ for i in $(SLOW_TEST_TOOLS);\ do\ diff --git a/test/Makefile b/test/Makefile index 9f6464b2..ae67bbea 100644 --- a/test/Makefile +++ b/test/Makefile @@ -35,6 +35,7 @@ include $(TOP)/build/Makefile.config # default # SUBDIRS = \ + vdb-config \ fastq-loader \ vcf-loader \ kget \ diff --git a/test/vdb-config/Makefile b/test/vdb-config/Makefile new file mode 100644 index 00000000..7813982a --- /dev/null +++ b/test/vdb-config/Makefile @@ -0,0 +1,35 @@ +# =========================================================================== +# +# PUBLIC DOMAIN NOTICE +# National Center for Biotechnology Information +# +# This software/database is a "United States Government Work" under the +# terms of the United States Copyright Act. It was written as part of +# the author's official duties as a United States Government employee and +# thus cannot be copyrighted. This software/database is freely available +# to the public for use. The National Library of Medicine and the U.S. +# Government have not placed any restriction on its use or reproduction. +# +# Although all reasonable efforts have been taken to ensure the accuracy +# and reliability of the software and data, the NLM and the U.S. +# Government do not and cannot warrant the performance or results that +# may be obtained by using this software or data. The NLM and the U.S. +# Government disclaim all warranties, express or implied, including +# warranties of performance, merchantability or fitness for any particular +# purpose. +# +# Please cite the author in any work or product based on this material. +# +# =========================================================================== + +default: runtests + +TOP ?= $(abspath ../..) +include $(TOP)/build/Makefile.env # BINDIR + +runtests: test-vdb-config + +test-vdb-config: + @ printf "Testing exit code vdb-config of vdb-config... " + @ PATH=$(BINDIR):$(PATH) ; ./test-vdb-config.pl + @ echo OK diff --git a/test/vdb-config/test-vdb-config.pl b/test/vdb-config/test-vdb-config.pl new file mode 100755 index 00000000..542a5b02 --- /dev/null +++ b/test/vdb-config/test-vdb-config.pl @@ -0,0 +1,12 @@ +#!/usr/bin/perl -w + +use strict; + +use File::Temp "tempdir"; + +my $tmp = tempdir ( "phgvXXXX", CLEANUP => 1 ); +$ENV{VDB_CONFIG}=$tmp; +$ENV{NCBI_SETTINGS}="$tmp/u.mkfg"; + +`vdb-config -s foo=bar`; +die "vdb-config exited with " . ( $? >> 8 ) if ( $? ); diff --git a/test/vdb-dump/Makefile b/test/vdb-dump/Makefile index 5449f2c5..9bcca8b5 100644 --- a/test/vdb-dump/Makefile +++ b/test/vdb-dump/Makefile @@ -53,6 +53,14 @@ $(ALL_TOOLS): makedirs .PHONY: all std $(ALL_TOOLS) #------------------------------------------------------------------------------- +# all +# +$(TARGDIR)/all: \ + $(addprefix $(BINDIR)/,$(ALL_TOOLS)) + +.PHONY: $(TARGDIR)/all + +#------------------------------------------------------------------------------- # std # $(TARGDIR)/std: \ diff --git a/tools/bam-loader/loader-imp.c b/tools/bam-loader/loader-imp.c index f81ea7ce..dc5cdba1 100644 --- a/tools/bam-loader/loader-imp.c +++ b/tools/bam-loader/loader-imp.c @@ -1548,6 +1548,7 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db, KDataBuffer fragBuf; KDataBuffer cigBuf; rc_t rc; + const BAMRefSeq *refSeq = NULL; int32_t lastRefSeqId = -1; bool wasRenamed = false; size_t rsize; @@ -1650,7 +1651,6 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db, uint16_t flags; int64_t rpos=0; char *seqDNA; - const BAMRefSeq *refSeq; ctx_value_t *value; bool wasInserted; int32_t refSeqId=-1; diff --git a/tools/bam-loader/reference-writer.c b/tools/bam-loader/reference-writer.c index c35ebd6c..071cd85b 100644 --- a/tools/bam-loader/reference-writer.c +++ b/tools/bam-loader/reference-writer.c @@ -66,12 +66,6 @@ struct overlap_s { uint32_t max; /* maximum end pos of any alignment that starts before this chunk and ends in this chunk */ }; -struct s_reference_info { - unsigned name; /* offset of start of name in ref_names */ - unsigned id; - unsigned lastOffset; -}; - extern void ReferenceMgr_DumpConfig(ReferenceMgr const *const self); rc_t ReferenceInit(Reference *self, const VDBManager *mgr, VDatabase *db) @@ -85,8 +79,7 @@ rc_t ReferenceInit(Reference *self, const VDBManager *mgr, VDatabase *db) self->coverage.elem_bits = self->mismatches.elem_bits = self->indels.elem_bits = 32; self->pri_align.elem_bits = self->sec_align.elem_bits = 64; self->pri_overlap.elem_bits = self->sec_overlap.elem_bits = sizeof(struct overlap_s) * 8; - self->ref_names.elem_bits = 8; - self->ref_info.elem_bits = 8 * sizeof(struct s_reference_info); + self->ref_info.elem_bits = 8 * sizeof(ReferenceSeq const *); rc = ReferenceMgr_Make(&self->mgr, db, mgr, ewrefmgr_co_Coverage, G.refXRefPath, G.inpath, @@ -263,22 +256,20 @@ static unsigned str__len(char const A[]) } } -static unsigned bsearch_name(char const qry[], char const names[], +static unsigned bsearch_name(ReferenceSeq const *const qry, unsigned const count, - struct s_reference_info const refInfo[], - int found[]) + ReferenceSeq const *const *const refInfo) { unsigned f = 0; unsigned e = count; while (f < e) { unsigned const m = f + ((e - f) >> 1); - char const *const name = &names[refInfo[m].name]; - int const diff = str__cmp(qry, name); + ReferenceSeq const *const fnd = refInfo[m]; - if (diff < 0) + if (qry < fnd) e = m; - else if (diff > 0) + else if (qry > fnd) f = m + 1; else { found[0] = 1; @@ -288,92 +279,37 @@ static unsigned bsearch_name(char const qry[], char const names[], return f; } -static struct s_reference_info s_reference_info_make(unsigned const name, unsigned const id) -{ - struct s_reference_info rslt; - - rslt.name = name; - rslt.id = id; - rslt.lastOffset = 0; - - return rslt; -} - -static unsigned GetLastOffset(Reference const *const self) -{ - if (self->last_id < self->ref_info.elem_count) { - struct s_reference_info const *const refInfoBase = self->ref_info.base; - return refInfoBase[self->last_id].lastOffset; - } - return 0; -} - -static void SetLastOffset(Reference *const self, unsigned const newValue) -{ - if (self->last_id < self->ref_info.elem_count) { - struct s_reference_info *const refInfoBase = self->ref_info.base; - refInfoBase[self->last_id].lastOffset = newValue; - } -} - rc_t ReferenceSetFile(Reference *const self, char const id[], uint64_t const length, uint8_t const md5[16], bool *const shouldUnmap, bool *const wasRenamed) { - ReferenceSeq const *rseq; - int found = 0; + ReferenceSeq const *rseq = NULL; unsigned at = 0; - if (self->last_id < self->ref_info.elem_count) { - struct s_reference_info const *const refInfoBase = self->ref_info.base; - struct s_reference_info const refInfo = refInfoBase[self->last_id]; - char const *const nameBase = self->ref_names.base; - char const *const last = nameBase + refInfo.id; - - if (str__equal(id, last)) { - return 0; - } - } - BAIL_ON_FAIL(FlushBuffers(self, self->length, true, true)); BAIL_ON_FAIL(ReferenceMgr_GetSeq(self->mgr, &rseq, id, shouldUnmap, G.allowMultiMapping, wasRenamed)); + if (self->rseq == rseq) + return 0; - self->rseq = rseq; - - at = bsearch_name(id, self->ref_names.base, self->ref_info.elem_count, self->ref_info.base, &found); - if (!found) { - unsigned const len = str__len(id); - unsigned const name_at = self->ref_names.elem_count; - unsigned const id_at = name_at; - struct s_reference_info const new_elem = s_reference_info_make(name_at, id_at); - rc_t const rc = KDataBufferResize(&self->ref_names, name_at + len + 1); + at = bsearch_name(rseq, self->ref_info.elem_count, self->ref_info.base); + if (at == self->ref_info.elem_count || ((ReferenceSeq const **)self->ref_info.base)[at] != rseq) { + unsigned const count = (unsigned)self->ref_info.elem_count; + rc_t const rc = KDataBufferResize(&self->ref_info, count + 1); + struct s_reference_info *const refInfoBase = self->ref_info.base; if (rc) return rc; - else { - unsigned const count = (unsigned)self->ref_info.elem_count; - rc_t const rc = KDataBufferResize(&self->ref_info, count + 1); - struct s_reference_info *const refInfoBase = self->ref_info.base; - - if (rc) - return rc; - - memmove(((char *)self->ref_names.base) + name_at, id, len + 1); - memmove(refInfoBase + at + 1, refInfoBase + at, (count - at) * sizeof(*refInfoBase)); - refInfoBase[at] = new_elem; - } - (void)PLOGMSG(klogInfo, (klogInfo, "Processing Reference '$(id)'", "id=%s", id)); - if (*wasRenamed) { - char const *actid = NULL; - ReferenceSeq_GetID(rseq, &actid); - (void)PLOGMSG(klogInfo, (klogInfo, "Reference '$(id)' was renamed to '$(actid)'", "id=%s,actid=%s", id, actid)); - } + + memmove(refInfoBase + at + 1, refInfoBase + at, (count - at) * sizeof(*refInfoBase)); + refInfoBase[at] = rseq; } - else if (!self->out_of_order) + else if (!self->out_of_order) { Unsorted(self); - - self->last_id = at; + } + self->rseq = rseq; + + self->lastOffset = 0; self->curPos = self->endPos = 0; self->length = (unsigned)length; KDataBufferResize(&self->pri_overlap, 0); @@ -539,22 +475,16 @@ rc_t ReferenceRead(Reference *self, AlignmentRecord *data, uint64_t const pos, GetCounts(data, seqLen, &nmatch, &nmis, &indels); *matches = nmatch; *misses = nmis; -/* removed before more comlete implementation - EY - if (!G.acceptNoMatch && data->data.ref_len == 0) - return RC(rcApp, rcFile, rcReading, rcConstraint, rcViolated); -***********************/ - if (!self->out_of_order && pos < GetLastOffset(self)) { + if (!self->out_of_order && pos < self->lastOffset) { return Unsorted(self); } if (!self->out_of_order) { - SetLastOffset(self, data->data.effective_offset); + self->lastOffset = (unsigned)data->data.effective_offset; - /* if (G.acceptNoMatch || nmatch >= G.minMatchCount) --- removed before more comlete implementation - EY ***/ - return ReferenceAddCoverage(self, data->data.effective_offset, - data->data.ref_len, nmis, indels, - data->isPrimary); - /* else return RC(rcApp, rcFile, rcReading, rcConstraint, rcViolated); --- removed before more comlete implementation - EY ***/ + return ReferenceAddCoverage(self, data->data.effective_offset, + data->data.ref_len, nmis, indels, + data->isPrimary); } return 0; } @@ -599,7 +529,6 @@ rc_t ReferenceWhack(Reference *self, bool commit) KDataBufferWhack(&self->coverage); KDataBufferWhack(&self->pri_overlap); KDataBufferWhack(&self->sec_overlap); - KDataBufferWhack(&self->ref_names); KDataBufferWhack(&self->ref_info); if (self->rseq) rc = ReferenceSeq_Release(self->rseq); diff --git a/tools/bam-loader/reference-writer.h b/tools/bam-loader/reference-writer.h index 2946c82d..007740e6 100644 --- a/tools/bam-loader/reference-writer.h +++ b/tools/bam-loader/reference-writer.h @@ -41,7 +41,7 @@ typedef struct s_reference { unsigned curPos; unsigned endPos; unsigned length; - unsigned last_id; /* == ref_info.elem_count if no last id */ + unsigned lastOffset; KDataBuffer coverage; KDataBuffer mismatches; @@ -50,7 +50,6 @@ typedef struct s_reference { KDataBuffer sec_align; KDataBuffer pri_overlap; KDataBuffer sec_overlap; - KDataBuffer ref_names; KDataBuffer ref_info; bool out_of_order; diff --git a/tools/sra-dump/fastq.c b/tools/sra-dump/fastq.c index 868443f9..0ee34956 100644 --- a/tools/sra-dump/fastq.c +++ b/tools/sra-dump/fastq.c @@ -3694,7 +3694,7 @@ rc_t SRADumper_Init( SRADumperFmt* fmt ) /* DO NOT ADD IN THE MIDDLE ORDER IS IMPORTANT IN USAGE FUNCTION ABOVE!!! */ {NULL, "split-spot", NULL, {"Split spots into individual reads", NULL}}, /* H_splip_sot = 0 */ - {"W", "clip", NULL, {"Clip adapter sequences", NULL}}, /* H_clip = 1 */ + {"W", "clip", NULL, {"Remove adapter sequences from reads", NULL}}, /* H_clip = 1 */ {"M", "minReadLen", "len", {"Filter by sequence length >= ", NULL}}, /* H_minReadLen = 2 */ {"E", "qual-filter", NULL, {"Filter used in early 1000 Genomes data:", /* H_qual_filter = 3 */ diff --git a/tools/util/test-sra.c b/tools/util/test-sra.c index c61ea626..09f2bd57 100644 --- a/tools/util/test-sra.c +++ b/tools/util/test-sra.c @@ -3438,11 +3438,11 @@ static rc_t MainFreeSpace ( const Main * self, const KDirectory * dir ) { if ( self -> xml ) OUTMSG ( ( - " \n", + " \n", free_bytes_available, total_number_of_bytes ) ); else OUTMSG ( ( - " Space free=\"%d\" total=\"%d\" units=\"KBytes\"\n", + " Space free=\"%lu\" total=\"%lu\" units=\"KBytes\"\n", free_bytes_available, total_number_of_bytes ) ); return rc; diff --git a/tools/vdb-config/vdb-config.c b/tools/vdb-config/vdb-config.c index f4b9e562..cec49c0e 100644 --- a/tools/vdb-config/vdb-config.c +++ b/tools/vdb-config/vdb-config.c @@ -1672,6 +1672,8 @@ rc_t CC KMain(int argc, char* argv[]) { } } } + else + rc = 0; } RELEASE ( KDirectory, d );