Permalink
...
Checking mergeability…
Don’t worry, you can still create the pull request.
Comparing changes
Open a pull request
- 12 commits
- 12 files changed
- 0 commit comments
- 4 contributors
Commits on Mar 06, 2017
|
|
kwrodarmer |
58fc9c0
|
Commits on Mar 07, 2017
|
|
durbrow |
fdd9971
|
Commits on Mar 10, 2017
|
|
klymenko |
c79570e
|
|||
|
|
klymenko |
bf6c6ca
|
|||
|
|
klymenko |
bf7dc4d
|
|||
|
|
klymenko |
a549071
|
Commits on Mar 13, 2017
|
|
durbrow |
46953fc
|
|||
|
|
durbrow |
6650c16
|
Commits on Mar 14, 2017
|
|
aboshkin |
1363078
|
|||
|
|
aboshkin |
53f931c
|
Commits on Mar 16, 2017
|
|
klymenko |
5bc31d8
|
Commits on Mar 27, 2017
|
|
durbrow |
4c9e1e1
|
Unified
Split
Showing
with
125 additions
and 109 deletions.
- +30 −0 CHANGES.md
- +5 −5 build/Makefile.env
- +1 −0 test/Makefile
- +35 −0 test/vdb-config/Makefile
- +12 −0 test/vdb-config/test-vdb-config.pl
- +8 −0 test/vdb-dump/Makefile
- +1 −1 tools/bam-loader/loader-imp.c
- +27 −98 tools/bam-loader/reference-writer.c
- +1 −2 tools/bam-loader/reference-writer.h
- +1 −1 tools/sra-dump/fastq.c
- +2 −2 tools/util/test-sra.c
- +2 −0 tools/vdb-config/vdb-config.c
View
30
CHANGES.md
| @@ -1,6 +1,36 @@ | ||
| # NCBI External Developer Release: | ||
| +## SRA Toolkit 2.8.2 | ||
| +**March 6, 2017** | ||
| + | ||
| + **blast**: Updated blast library to be able to process runs having empty rows | ||
| + **blast, build**: removed library dependencies that were preventing users from launching these tools | ||
| + **blast, sra-tools**: Prepared completely static build of blast tools for windows with HTTPS support | ||
| + **build**: **bam-load**: changed memcpy to memmove when regions overlap - seems to corrupt data on centos7 | ||
| + **build**: Added ability to specify ncbi-vdb/configure --with-magic-prefix. Look for libraries in (lib lib64) when running "configure --with-...-prefix" | ||
| + **build**: configure detects location of ngs libraries | ||
| + **build**: configure was fixed to skip options unrecognized by gcc 4.4.7 | ||
| + **build**: created sra-toolkit Debian package | ||
| + **build**: fixed a bug in 'configure' when in could not find source files in repository saved with non-standard name | ||
| + **build, ncbi-vdb, sra-tools**: installation will back up old configuration files if they differ from the ones being installed | ||
| + **cg-load**: added loading of CG File format v2.4 | ||
| + **kns**: SRA tools respect standard set of environment variables for proxy specification | ||
| + **kns**: updated mbedtls library to version 2.4.1 | ||
| + **ncbi-vdb, ngs, ngs-tools, sra-tools**: eliminated memcpy from sources due to potential for overlap | ||
| + **ngs, sra-search**: now supports search on reference | ||
| + **ngs-tools**: updated the NCBI download page to incorporate ngs versions into 3rd party package names | ||
| + **prefetch**: Fixed error message "path excessive while opening directory" when prefetch is trying to get vdbcache | ||
| + **prefetch**: Fixed regression in prefetch-2.8.1 when downloading dbGaP files via HTTP | ||
| + **prefetch**: Fixed regression in prefetch-2.8.1 when downloading vdbcache files from dbGaP via HTTP | ||
| + **sam-dump**: consistency of sam-dump in fastq-mod improved | ||
| + **sam-dump**: consistency of sam-dump in fastq-mode improved | ||
| + **sra-stat**: sra-stat does XML-escaping when printing spot-groups | ||
| + **test-sra**: extended test-sra to debug user problems with https connections to NCBI | ||
| + **test-sra**: test-sra print amount of available disk space in user repositories | ||
| + **vdb-config**: vdb-config correctly works when there is non-canonical path in configuration | ||
| + | ||
| + | ||
| ## SRA Toolkit 2.8.1-2 | ||
| **January 19, 2017** | ||
View
10
build/Makefile.env
| @@ -200,7 +200,7 @@ endif | ||
| .PHONY: stdclean removelinks makedirs vers-includes rebuild-dirlinks | ||
| .PHONY: stdjclean makejdirs | ||
| -# configuration targets | ||
| +# configuration targets | ||
| out: | ||
| @ echo $(OUTDIR) > $(TOP)/build/OUTDIR.$(BUILD_OS) | ||
| @ $(MAKE) TOP=$(TOP) -f $(TOP)/build/Makefile.env rebuild-dirlinks | ||
| @@ -362,11 +362,11 @@ LDFLAGS = $(DBG) $(PROF) $(CARCH) $(MIN_DEPLOY_OS_OPT) $(LDPATHS) | ||
| #------------------------------------------------------------------------------- | ||
| # runtests | ||
| -# | ||
| +# | ||
| # MallocScribble=1 is for catching allocation problems on Mac | ||
| # | ||
| ifeq ($(RUNTESTS_OVERRIDE),) | ||
| -runtests: std $(TEST_TOOLS) | ||
| +runtests: all $(TEST_TOOLS) | ||
| @ export VDB_CONFIG=$(VDB_CONFIG);export LD_LIBRARY_PATH=$(LIBDIR):$$LD_LIBRARY_PATH;export MallocScribble=1;\ | ||
| for i in $(TEST_TOOLS);\ | ||
| do\ | ||
| @@ -380,11 +380,11 @@ endif | ||
| #------------------------------------------------------------------------------- | ||
| # slowtests | ||
| -# | ||
| +# | ||
| # $(SLOWTESTSDATADIR) should be used to create temporary test files | ||
| SLOWTESTSDATADIR ?= /panfs/pan1.be-md.ncbi.nlm.nih.gov/sra-test/slowtests/$(shell whoami) | ||
| -slowtests: std $(SLOW_TEST_TOOLS) | ||
| +slowtests: all $(SLOW_TEST_TOOLS) | ||
| @ export LD_LIBRARY_PATH=$(LIBDIR):$$LD_LIBRARY_PATH;\ | ||
| for i in $(SLOW_TEST_TOOLS);\ | ||
| do\ | ||
View
1
test/Makefile
| @@ -35,6 +35,7 @@ include $(TOP)/build/Makefile.config | ||
| # default | ||
| # | ||
| SUBDIRS = \ | ||
| + vdb-config \ | ||
| fastq-loader \ | ||
| vcf-loader \ | ||
| kget \ | ||
View
35
test/vdb-config/Makefile
| @@ -0,0 +1,35 @@ | ||
| +# =========================================================================== | ||
| +# | ||
| +# PUBLIC DOMAIN NOTICE | ||
| +# National Center for Biotechnology Information | ||
| +# | ||
| +# This software/database is a "United States Government Work" under the | ||
| +# terms of the United States Copyright Act. It was written as part of | ||
| +# the author's official duties as a United States Government employee and | ||
| +# thus cannot be copyrighted. This software/database is freely available | ||
| +# to the public for use. The National Library of Medicine and the U.S. | ||
| +# Government have not placed any restriction on its use or reproduction. | ||
| +# | ||
| +# Although all reasonable efforts have been taken to ensure the accuracy | ||
| +# and reliability of the software and data, the NLM and the U.S. | ||
| +# Government do not and cannot warrant the performance or results that | ||
| +# may be obtained by using this software or data. The NLM and the U.S. | ||
| +# Government disclaim all warranties, express or implied, including | ||
| +# warranties of performance, merchantability or fitness for any particular | ||
| +# purpose. | ||
| +# | ||
| +# Please cite the author in any work or product based on this material. | ||
| +# | ||
| +# =========================================================================== | ||
| + | ||
| +default: runtests | ||
| + | ||
| +TOP ?= $(abspath ../..) | ||
| +include $(TOP)/build/Makefile.env # BINDIR | ||
| + | ||
| +runtests: test-vdb-config | ||
| + | ||
| +test-vdb-config: | ||
| + @ printf "Testing exit code vdb-config of vdb-config... " | ||
| + @ PATH=$(BINDIR):$(PATH) ; ./test-vdb-config.pl | ||
| + @ echo OK |
View
12
test/vdb-config/test-vdb-config.pl
| @@ -0,0 +1,12 @@ | ||
| +#!/usr/bin/perl -w | ||
| + | ||
| +use strict; | ||
| + | ||
| +use File::Temp "tempdir"; | ||
| + | ||
| +my $tmp = tempdir ( "phgvXXXX", CLEANUP => 1 ); | ||
| +$ENV{VDB_CONFIG}=$tmp; | ||
| +$ENV{NCBI_SETTINGS}="$tmp/u.mkfg"; | ||
| + | ||
| +`vdb-config -s foo=bar`; | ||
| +die "vdb-config exited with " . ( $? >> 8 ) if ( $? ); |
View
8
test/vdb-dump/Makefile
| @@ -53,6 +53,14 @@ $(ALL_TOOLS): makedirs | ||
| .PHONY: all std $(ALL_TOOLS) | ||
| #------------------------------------------------------------------------------- | ||
| +# all | ||
| +# | ||
| +$(TARGDIR)/all: \ | ||
| + $(addprefix $(BINDIR)/,$(ALL_TOOLS)) | ||
| + | ||
| +.PHONY: $(TARGDIR)/all | ||
| + | ||
| +#------------------------------------------------------------------------------- | ||
| # std | ||
| # | ||
| $(TARGDIR)/std: \ | ||
View
2
tools/bam-loader/loader-imp.c
| @@ -1548,6 +1548,7 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db, | ||
| KDataBuffer fragBuf; | ||
| KDataBuffer cigBuf; | ||
| rc_t rc; | ||
| + const BAMRefSeq *refSeq = NULL; | ||
| int32_t lastRefSeqId = -1; | ||
| bool wasRenamed = false; | ||
| size_t rsize; | ||
| @@ -1650,7 +1651,6 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db, | ||
| uint16_t flags; | ||
| int64_t rpos=0; | ||
| char *seqDNA; | ||
| - const BAMRefSeq *refSeq; | ||
| ctx_value_t *value; | ||
| bool wasInserted; | ||
| int32_t refSeqId=-1; | ||
View
125
tools/bam-loader/reference-writer.c
| @@ -66,12 +66,6 @@ struct overlap_s { | ||
| uint32_t max; /* maximum end pos of any alignment that starts before this chunk and ends in this chunk */ | ||
| }; | ||
| -struct s_reference_info { | ||
| - unsigned name; /* offset of start of name in ref_names */ | ||
| - unsigned id; | ||
| - unsigned lastOffset; | ||
| -}; | ||
| - | ||
| extern void ReferenceMgr_DumpConfig(ReferenceMgr const *const self); | ||
| rc_t ReferenceInit(Reference *self, const VDBManager *mgr, VDatabase *db) | ||
| @@ -85,8 +79,7 @@ rc_t ReferenceInit(Reference *self, const VDBManager *mgr, VDatabase *db) | ||
| self->coverage.elem_bits = self->mismatches.elem_bits = self->indels.elem_bits = 32; | ||
| self->pri_align.elem_bits = self->sec_align.elem_bits = 64; | ||
| self->pri_overlap.elem_bits = self->sec_overlap.elem_bits = sizeof(struct overlap_s) * 8; | ||
| - self->ref_names.elem_bits = 8; | ||
| - self->ref_info.elem_bits = 8 * sizeof(struct s_reference_info); | ||
| + self->ref_info.elem_bits = 8 * sizeof(ReferenceSeq const *); | ||
| rc = ReferenceMgr_Make(&self->mgr, db, mgr, ewrefmgr_co_Coverage, | ||
| G.refXRefPath, G.inpath, | ||
| @@ -263,22 +256,20 @@ static unsigned str__len(char const A[]) | ||
| } | ||
| } | ||
| -static unsigned bsearch_name(char const qry[], char const names[], | ||
| +static unsigned bsearch_name(ReferenceSeq const *const qry, | ||
| unsigned const count, | ||
| - struct s_reference_info const refInfo[], | ||
| - int found[]) | ||
| + ReferenceSeq const *const *const refInfo) | ||
| { | ||
| unsigned f = 0; | ||
| unsigned e = count; | ||
| while (f < e) { | ||
| unsigned const m = f + ((e - f) >> 1); | ||
| - char const *const name = &names[refInfo[m].name]; | ||
| - int const diff = str__cmp(qry, name); | ||
| + ReferenceSeq const *const fnd = refInfo[m]; | ||
| - if (diff < 0) | ||
| + if (qry < fnd) | ||
| e = m; | ||
| - else if (diff > 0) | ||
| + else if (qry > fnd) | ||
| f = m + 1; | ||
| else { | ||
| found[0] = 1; | ||
| @@ -288,92 +279,37 @@ static unsigned bsearch_name(char const qry[], char const names[], | ||
| return f; | ||
| } | ||
| -static struct s_reference_info s_reference_info_make(unsigned const name, unsigned const id) | ||
| -{ | ||
| - struct s_reference_info rslt; | ||
| - | ||
| - rslt.name = name; | ||
| - rslt.id = id; | ||
| - rslt.lastOffset = 0; | ||
| - | ||
| - return rslt; | ||
| -} | ||
| - | ||
| -static unsigned GetLastOffset(Reference const *const self) | ||
| -{ | ||
| - if (self->last_id < self->ref_info.elem_count) { | ||
| - struct s_reference_info const *const refInfoBase = self->ref_info.base; | ||
| - return refInfoBase[self->last_id].lastOffset; | ||
| - } | ||
| - return 0; | ||
| -} | ||
| - | ||
| -static void SetLastOffset(Reference *const self, unsigned const newValue) | ||
| -{ | ||
| - if (self->last_id < self->ref_info.elem_count) { | ||
| - struct s_reference_info *const refInfoBase = self->ref_info.base; | ||
| - refInfoBase[self->last_id].lastOffset = newValue; | ||
| - } | ||
| -} | ||
| - | ||
| rc_t ReferenceSetFile(Reference *const self, char const id[], | ||
| uint64_t const length, uint8_t const md5[16], | ||
| bool *const shouldUnmap, | ||
| bool *const wasRenamed) | ||
| { | ||
| - ReferenceSeq const *rseq; | ||
| - int found = 0; | ||
| + ReferenceSeq const *rseq = NULL; | ||
| unsigned at = 0; | ||
| - if (self->last_id < self->ref_info.elem_count) { | ||
| - struct s_reference_info const *const refInfoBase = self->ref_info.base; | ||
| - struct s_reference_info const refInfo = refInfoBase[self->last_id]; | ||
| - char const *const nameBase = self->ref_names.base; | ||
| - char const *const last = nameBase + refInfo.id; | ||
| - | ||
| - if (str__equal(id, last)) { | ||
| - return 0; | ||
| - } | ||
| - } | ||
| - | ||
| BAIL_ON_FAIL(FlushBuffers(self, self->length, true, true)); | ||
| BAIL_ON_FAIL(ReferenceMgr_GetSeq(self->mgr, &rseq, id, shouldUnmap, G.allowMultiMapping, wasRenamed)); | ||
| + if (self->rseq == rseq) | ||
| + return 0; | ||
| - self->rseq = rseq; | ||
| - | ||
| - at = bsearch_name(id, self->ref_names.base, self->ref_info.elem_count, self->ref_info.base, &found); | ||
| - if (!found) { | ||
| - unsigned const len = str__len(id); | ||
| - unsigned const name_at = self->ref_names.elem_count; | ||
| - unsigned const id_at = name_at; | ||
| - struct s_reference_info const new_elem = s_reference_info_make(name_at, id_at); | ||
| - rc_t const rc = KDataBufferResize(&self->ref_names, name_at + len + 1); | ||
| + at = bsearch_name(rseq, self->ref_info.elem_count, self->ref_info.base); | ||
| + if (at == self->ref_info.elem_count || ((ReferenceSeq const **)self->ref_info.base)[at] != rseq) { | ||
| + unsigned const count = (unsigned)self->ref_info.elem_count; | ||
| + rc_t const rc = KDataBufferResize(&self->ref_info, count + 1); | ||
| + struct s_reference_info *const refInfoBase = self->ref_info.base; | ||
| if (rc) | ||
| return rc; | ||
| - else { | ||
| - unsigned const count = (unsigned)self->ref_info.elem_count; | ||
| - rc_t const rc = KDataBufferResize(&self->ref_info, count + 1); | ||
| - struct s_reference_info *const refInfoBase = self->ref_info.base; | ||
| - | ||
| - if (rc) | ||
| - return rc; | ||
| - | ||
| - memmove(((char *)self->ref_names.base) + name_at, id, len + 1); | ||
| - memmove(refInfoBase + at + 1, refInfoBase + at, (count - at) * sizeof(*refInfoBase)); | ||
| - refInfoBase[at] = new_elem; | ||
| - } | ||
| - (void)PLOGMSG(klogInfo, (klogInfo, "Processing Reference '$(id)'", "id=%s", id)); | ||
| - if (*wasRenamed) { | ||
| - char const *actid = NULL; | ||
| - ReferenceSeq_GetID(rseq, &actid); | ||
| - (void)PLOGMSG(klogInfo, (klogInfo, "Reference '$(id)' was renamed to '$(actid)'", "id=%s,actid=%s", id, actid)); | ||
| - } | ||
| + | ||
| + memmove(refInfoBase + at + 1, refInfoBase + at, (count - at) * sizeof(*refInfoBase)); | ||
| + refInfoBase[at] = rseq; | ||
| } | ||
| - else if (!self->out_of_order) | ||
| + else if (!self->out_of_order) { | ||
| Unsorted(self); | ||
| - | ||
| - self->last_id = at; | ||
| + } | ||
| + self->rseq = rseq; | ||
| + | ||
| + self->lastOffset = 0; | ||
| self->curPos = self->endPos = 0; | ||
| self->length = (unsigned)length; | ||
| KDataBufferResize(&self->pri_overlap, 0); | ||
| @@ -539,22 +475,16 @@ rc_t ReferenceRead(Reference *self, AlignmentRecord *data, uint64_t const pos, | ||
| GetCounts(data, seqLen, &nmatch, &nmis, &indels); | ||
| *matches = nmatch; | ||
| *misses = nmis; | ||
| -/* removed before more comlete implementation - EY | ||
| - if (!G.acceptNoMatch && data->data.ref_len == 0) | ||
| - return RC(rcApp, rcFile, rcReading, rcConstraint, rcViolated); | ||
| -***********************/ | ||
| - if (!self->out_of_order && pos < GetLastOffset(self)) { | ||
| + if (!self->out_of_order && pos < self->lastOffset) { | ||
| return Unsorted(self); | ||
| } | ||
| if (!self->out_of_order) { | ||
| - SetLastOffset(self, data->data.effective_offset); | ||
| + self->lastOffset = (unsigned)data->data.effective_offset; | ||
| - /* if (G.acceptNoMatch || nmatch >= G.minMatchCount) --- removed before more comlete implementation - EY ***/ | ||
| - return ReferenceAddCoverage(self, data->data.effective_offset, | ||
| - data->data.ref_len, nmis, indels, | ||
| - data->isPrimary); | ||
| - /* else return RC(rcApp, rcFile, rcReading, rcConstraint, rcViolated); --- removed before more comlete implementation - EY ***/ | ||
| + return ReferenceAddCoverage(self, data->data.effective_offset, | ||
| + data->data.ref_len, nmis, indels, | ||
| + data->isPrimary); | ||
| } | ||
| return 0; | ||
| } | ||
| @@ -599,7 +529,6 @@ rc_t ReferenceWhack(Reference *self, bool commit) | ||
| KDataBufferWhack(&self->coverage); | ||
| KDataBufferWhack(&self->pri_overlap); | ||
| KDataBufferWhack(&self->sec_overlap); | ||
| - KDataBufferWhack(&self->ref_names); | ||
| KDataBufferWhack(&self->ref_info); | ||
| if (self->rseq) | ||
| rc = ReferenceSeq_Release(self->rseq); | ||
View
3
tools/bam-loader/reference-writer.h
| @@ -41,7 +41,7 @@ typedef struct s_reference { | ||
| unsigned curPos; | ||
| unsigned endPos; | ||
| unsigned length; | ||
| - unsigned last_id; /* == ref_info.elem_count if no last id */ | ||
| + unsigned lastOffset; | ||
| KDataBuffer coverage; | ||
| KDataBuffer mismatches; | ||
| @@ -50,7 +50,6 @@ typedef struct s_reference { | ||
| KDataBuffer sec_align; | ||
| KDataBuffer pri_overlap; | ||
| KDataBuffer sec_overlap; | ||
| - KDataBuffer ref_names; | ||
| KDataBuffer ref_info; | ||
| bool out_of_order; | ||
View
2
tools/sra-dump/fastq.c
| @@ -3694,7 +3694,7 @@ rc_t SRADumper_Init( SRADumperFmt* fmt ) | ||
| /* DO NOT ADD IN THE MIDDLE ORDER IS IMPORTANT IN USAGE FUNCTION ABOVE!!! */ | ||
| {NULL, "split-spot", NULL, {"Split spots into individual reads", NULL}}, /* H_splip_sot = 0 */ | ||
| - {"W", "clip", NULL, {"Clip adapter sequences", NULL}}, /* H_clip = 1 */ | ||
| + {"W", "clip", NULL, {"Remove adapter sequences from reads", NULL}}, /* H_clip = 1 */ | ||
| {"M", "minReadLen", "len", {"Filter by sequence length >= <len>", NULL}}, /* H_minReadLen = 2 */ | ||
| {"E", "qual-filter", NULL, {"Filter used in early 1000 Genomes data:", /* H_qual_filter = 3 */ | ||
View
4
tools/util/test-sra.c
| @@ -3438,11 +3438,11 @@ static rc_t MainFreeSpace ( const Main * self, const KDirectory * dir ) { | ||
| if ( self -> xml ) | ||
| OUTMSG ( ( | ||
| - " <Space free=\"%d\" total=\"%d\" units=\"KBytes\"/>\n", | ||
| + " <Space free=\"%lu\" total=\"%lu\" units=\"KBytes\"/>\n", | ||
| free_bytes_available, total_number_of_bytes ) ); | ||
| else | ||
| OUTMSG ( ( | ||
| - " Space free=\"%d\" total=\"%d\" units=\"KBytes\"\n", | ||
| + " Space free=\"%lu\" total=\"%lu\" units=\"KBytes\"\n", | ||
| free_bytes_available, total_number_of_bytes ) ); | ||
| return rc; | ||
Oops, something went wrong.