From 58fc9c0bcdd76a977445837673ea507772c00ac8 Mon Sep 17 00:00:00 2001 From: kwrodarmer Date: Mon, 6 Mar 2017 14:28:20 -0500 Subject: [PATCH 01/18] change log for 2.8.2 --- CHANGES.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index e30b7837..9f02e9e3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,36 @@ # NCBI External Developer Release: +## SRA Toolkit 2.8.2 +**March 6, 2017** + + **blast**: Updated blast library to be able to process runs having empty rows + **blast, build**: removed library dependencies that were preventing users from launching these tools + **blast, sra-tools**: Prepared completely static build of blast tools for windows with HTTPS support + **build**: **bam-load**: changed memcpy to memmove when regions overlap - seems to corrupt data on centos7 + **build**: Added ability to specify ncbi-vdb/configure --with-magic-prefix. Look for libraries in (lib lib64) when running "configure --with-...-prefix" + **build**: configure detects location of ngs libraries + **build**: configure was fixed to skip options unrecognized by gcc 4.4.7 + **build**: created sra-toolkit Debian package + **build**: fixed a bug in 'configure' when in could not find source files in repository saved with non-standard name + **build, ncbi-vdb, sra-tools**: installation will back up old configuration files if they differ from the ones being installed + **cg-load**: added loading of CG File format v2.4 + **kns**: SRA tools respect standard set of environment variables for proxy specification + **kns**: updated mbedtls library to version 2.4.1 + **ncbi-vdb, ngs, ngs-tools, sra-tools**: eliminated memcpy from sources due to potential for overlap + **ngs, sra-search**: now supports search on reference + **ngs-tools**: updated the NCBI download page to incorporate ngs versions into 3rd party package names + **prefetch**: Fixed error message "path excessive while opening directory" when prefetch is trying to get vdbcache + **prefetch**: Fixed regression in prefetch-2.8.1 when downloading dbGaP files via HTTP + **prefetch**: Fixed regression in prefetch-2.8.1 when downloading vdbcache files from dbGaP via HTTP + **sam-dump**: consistency of sam-dump in fastq-mod improved + **sam-dump**: consistency of sam-dump in fastq-mode improved + **sra-stat**: sra-stat does XML-escaping when printing spot-groups + **test-sra**: extended test-sra to debug user problems with https connections to NCBI + **test-sra**: test-sra print amount of available disk space in user repositories + **vdb-config**: vdb-config correctly works when there is non-canonical path in configuration + + ## SRA Toolkit 2.8.1-2 **January 19, 2017** From fdd9971703d61e64fc78ba946ed984d6d9796b94 Mon Sep 17 00:00:00 2001 From: "durbrowk@ncbi.nlm.nih.gov" Date: Tue, 7 Mar 2017 14:43:57 -0500 Subject: [PATCH 02/18] initialized variable --- tools/bam-loader/loader-imp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bam-loader/loader-imp.c b/tools/bam-loader/loader-imp.c index f81ea7ce..b78a6358 100644 --- a/tools/bam-loader/loader-imp.c +++ b/tools/bam-loader/loader-imp.c @@ -1650,7 +1650,7 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db, uint16_t flags; int64_t rpos=0; char *seqDNA; - const BAMRefSeq *refSeq; + const BAMRefSeq *refSeq = NULL; ctx_value_t *value; bool wasInserted; int32_t refSeqId=-1; From c79570e7fedb15bbd88ddd8031bf2dd8a9329e95 Mon Sep 17 00:00:00 2001 From: klymenko Date: Fri, 10 Mar 2017 10:17:51 -0500 Subject: [PATCH 03/18] VDB-3272: do not return error when KConfig_Get_Default_User_Path fails --- tools/vdb-config/vdb-config.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/vdb-config/vdb-config.c b/tools/vdb-config/vdb-config.c index f4b9e562..cec49c0e 100644 --- a/tools/vdb-config/vdb-config.c +++ b/tools/vdb-config/vdb-config.c @@ -1672,6 +1672,8 @@ rc_t CC KMain(int argc, char* argv[]) { } } } + else + rc = 0; } RELEASE ( KDirectory, d ); From bf6c6caba20427d0bffdbef4fad4a2dbd8bf7705 Mon Sep 17 00:00:00 2001 From: klymenko Date: Fri, 10 Mar 2017 10:17:51 -0500 Subject: [PATCH 04/18] VDB-3272, VDB-3327: do not return error when KConfig_Get_Default_User_Path fails --- tools/vdb-config/vdb-config.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/vdb-config/vdb-config.c b/tools/vdb-config/vdb-config.c index f4b9e562..cec49c0e 100644 --- a/tools/vdb-config/vdb-config.c +++ b/tools/vdb-config/vdb-config.c @@ -1672,6 +1672,8 @@ rc_t CC KMain(int argc, char* argv[]) { } } } + else + rc = 0; } RELEASE ( KDirectory, d ); From a5490719365bea9c74424192596f4c4a1069f6fc Mon Sep 17 00:00:00 2001 From: klymenko Date: Fri, 10 Mar 2017 12:10:49 -0500 Subject: [PATCH 05/18] VDB-3327: added test to check vdb-config failure when /repository/user/default-path is not found --- test/Makefile | 1 + test/vdb-config/Makefile | 35 +++++++++++++++++++++++++++++++++++ test/vdb-config/test-vdb-config.pl | 12 ++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 test/vdb-config/Makefile create mode 100755 test/vdb-config/test-vdb-config.pl diff --git a/test/Makefile b/test/Makefile index 9f6464b2..ae67bbea 100644 --- a/test/Makefile +++ b/test/Makefile @@ -35,6 +35,7 @@ include $(TOP)/build/Makefile.config # default # SUBDIRS = \ + vdb-config \ fastq-loader \ vcf-loader \ kget \ diff --git a/test/vdb-config/Makefile b/test/vdb-config/Makefile new file mode 100644 index 00000000..7813982a --- /dev/null +++ b/test/vdb-config/Makefile @@ -0,0 +1,35 @@ +# =========================================================================== +# +# PUBLIC DOMAIN NOTICE +# National Center for Biotechnology Information +# +# This software/database is a "United States Government Work" under the +# terms of the United States Copyright Act. It was written as part of +# the author's official duties as a United States Government employee and +# thus cannot be copyrighted. This software/database is freely available +# to the public for use. The National Library of Medicine and the U.S. +# Government have not placed any restriction on its use or reproduction. +# +# Although all reasonable efforts have been taken to ensure the accuracy +# and reliability of the software and data, the NLM and the U.S. +# Government do not and cannot warrant the performance or results that +# may be obtained by using this software or data. The NLM and the U.S. +# Government disclaim all warranties, express or implied, including +# warranties of performance, merchantability or fitness for any particular +# purpose. +# +# Please cite the author in any work or product based on this material. +# +# =========================================================================== + +default: runtests + +TOP ?= $(abspath ../..) +include $(TOP)/build/Makefile.env # BINDIR + +runtests: test-vdb-config + +test-vdb-config: + @ printf "Testing exit code vdb-config of vdb-config... " + @ PATH=$(BINDIR):$(PATH) ; ./test-vdb-config.pl + @ echo OK diff --git a/test/vdb-config/test-vdb-config.pl b/test/vdb-config/test-vdb-config.pl new file mode 100755 index 00000000..542a5b02 --- /dev/null +++ b/test/vdb-config/test-vdb-config.pl @@ -0,0 +1,12 @@ +#!/usr/bin/perl -w + +use strict; + +use File::Temp "tempdir"; + +my $tmp = tempdir ( "phgvXXXX", CLEANUP => 1 ); +$ENV{VDB_CONFIG}=$tmp; +$ENV{NCBI_SETTINGS}="$tmp/u.mkfg"; + +`vdb-config -s foo=bar`; +die "vdb-config exited with " . ( $? >> 8 ) if ( $? ); From 46953fc35d2da0e831f10db0dd852f50cbb2ad58 Mon Sep 17 00:00:00 2001 From: "durbrowk@ncbi.nlm.nih.gov" Date: Mon, 13 Mar 2017 10:50:58 -0400 Subject: [PATCH 06/18] previously uninitialized variable hid a scope bug --- tools/bam-loader/loader-imp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bam-loader/loader-imp.c b/tools/bam-loader/loader-imp.c index b78a6358..dc5cdba1 100644 --- a/tools/bam-loader/loader-imp.c +++ b/tools/bam-loader/loader-imp.c @@ -1548,6 +1548,7 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db, KDataBuffer fragBuf; KDataBuffer cigBuf; rc_t rc; + const BAMRefSeq *refSeq = NULL; int32_t lastRefSeqId = -1; bool wasRenamed = false; size_t rsize; @@ -1650,7 +1651,6 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db, uint16_t flags; int64_t rpos=0; char *seqDNA; - const BAMRefSeq *refSeq = NULL; ctx_value_t *value; bool wasInserted; int32_t refSeqId=-1; From 6650c16c550f4c96c389b0fb88026cbad58d5b52 Mon Sep 17 00:00:00 2001 From: "durbrowk@ncbi.nlm.nih.gov" Date: Mon, 13 Mar 2017 11:30:01 -0400 Subject: [PATCH 07/18] changed text --- tools/sra-dump/fastq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/sra-dump/fastq.c b/tools/sra-dump/fastq.c index 868443f9..0ee34956 100644 --- a/tools/sra-dump/fastq.c +++ b/tools/sra-dump/fastq.c @@ -3694,7 +3694,7 @@ rc_t SRADumper_Init( SRADumperFmt* fmt ) /* DO NOT ADD IN THE MIDDLE ORDER IS IMPORTANT IN USAGE FUNCTION ABOVE!!! */ {NULL, "split-spot", NULL, {"Split spots into individual reads", NULL}}, /* H_splip_sot = 0 */ - {"W", "clip", NULL, {"Clip adapter sequences", NULL}}, /* H_clip = 1 */ + {"W", "clip", NULL, {"Remove adapter sequences from reads", NULL}}, /* H_clip = 1 */ {"M", "minReadLen", "len", {"Filter by sequence length >= ", NULL}}, /* H_minReadLen = 2 */ {"E", "qual-filter", NULL, {"Filter used in early 1000 Genomes data:", /* H_qual_filter = 3 */ From 1363078ccf17ade2437cbc122b90ef7267d3eedd Mon Sep 17 00:00:00 2001 From: aboshkin Date: Tue, 14 Mar 2017 13:34:20 -0400 Subject: [PATCH 08/18] VDB-3314 make targets runtests/slowtests now depend on 'all' --- build/Makefile.env | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/build/Makefile.env b/build/Makefile.env index 46cc5f53..9b5f2064 100644 --- a/build/Makefile.env +++ b/build/Makefile.env @@ -200,7 +200,7 @@ endif .PHONY: stdclean removelinks makedirs vers-includes rebuild-dirlinks .PHONY: stdjclean makejdirs -# configuration targets +# configuration targets out: @ echo $(OUTDIR) > $(TOP)/build/OUTDIR.$(BUILD_OS) @ $(MAKE) TOP=$(TOP) -f $(TOP)/build/Makefile.env rebuild-dirlinks @@ -362,11 +362,11 @@ LDFLAGS = $(DBG) $(PROF) $(CARCH) $(MIN_DEPLOY_OS_OPT) $(LDPATHS) #------------------------------------------------------------------------------- # runtests -# +# # MallocScribble=1 is for catching allocation problems on Mac # ifeq ($(RUNTESTS_OVERRIDE),) -runtests: std $(TEST_TOOLS) +runtests: all $(TEST_TOOLS) @ export VDB_CONFIG=$(VDB_CONFIG);export LD_LIBRARY_PATH=$(LIBDIR):$$LD_LIBRARY_PATH;export MallocScribble=1;\ for i in $(TEST_TOOLS);\ do\ @@ -380,11 +380,11 @@ endif #------------------------------------------------------------------------------- # slowtests -# +# # $(SLOWTESTSDATADIR) should be used to create temporary test files SLOWTESTSDATADIR ?= /panfs/pan1.be-md.ncbi.nlm.nih.gov/sra-test/slowtests/$(shell whoami) -slowtests: std $(SLOW_TEST_TOOLS) +slowtests: all $(SLOW_TEST_TOOLS) @ export LD_LIBRARY_PATH=$(LIBDIR):$$LD_LIBRARY_PATH;\ for i in $(SLOW_TEST_TOOLS);\ do\ From 53f931c2ee317d9ef6377ad915d77e5fc342269f Mon Sep 17 00:00:00 2001 From: aboshkin Date: Tue, 14 Mar 2017 14:19:48 -0400 Subject: [PATCH 09/18] VDB-3314 make targets runtests/slowtests now depend on 'all', additional fix --- test/vdb-dump/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/vdb-dump/Makefile b/test/vdb-dump/Makefile index 5449f2c5..9bcca8b5 100644 --- a/test/vdb-dump/Makefile +++ b/test/vdb-dump/Makefile @@ -53,6 +53,14 @@ $(ALL_TOOLS): makedirs .PHONY: all std $(ALL_TOOLS) #------------------------------------------------------------------------------- +# all +# +$(TARGDIR)/all: \ + $(addprefix $(BINDIR)/,$(ALL_TOOLS)) + +.PHONY: $(TARGDIR)/all + +#------------------------------------------------------------------------------- # std # $(TARGDIR)/std: \ From 5bc31d8615d51b30e3318e09f009ad004da7ed6d Mon Sep 17 00:00:00 2001 From: klymenko Date: Thu, 16 Mar 2017 11:26:52 -0400 Subject: [PATCH 10/18] VDB-3175: print free disk space as long unsigned --- tools/util/test-sra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/util/test-sra.c b/tools/util/test-sra.c index c61ea626..09f2bd57 100644 --- a/tools/util/test-sra.c +++ b/tools/util/test-sra.c @@ -3438,11 +3438,11 @@ static rc_t MainFreeSpace ( const Main * self, const KDirectory * dir ) { if ( self -> xml ) OUTMSG ( ( - " \n", + " \n", free_bytes_available, total_number_of_bytes ) ); else OUTMSG ( ( - " Space free=\"%d\" total=\"%d\" units=\"KBytes\"\n", + " Space free=\"%lu\" total=\"%lu\" units=\"KBytes\"\n", free_bytes_available, total_number_of_bytes ) ); return rc; From c166a6db7fcadd06cf8e316878310601be4ef4ef Mon Sep 17 00:00:00 2001 From: wraetz Date: Tue, 4 Apr 2017 16:46:17 -0400 Subject: [PATCH 11/18] added enumeration of static columns --- tools/vdb-dump/vdb-dump-coldefs.c | 74 +++++++++++++++++++++++++++++++++++++++ tools/vdb-dump/vdb-dump-coldefs.h | 2 ++ tools/vdb-dump/vdb-dump-context.c | 1 + tools/vdb-dump/vdb-dump-context.h | 2 ++ tools/vdb-dump/vdb-dump.c | 28 +++++++++++++++ 5 files changed, 107 insertions(+) diff --git a/tools/vdb-dump/vdb-dump-coldefs.c b/tools/vdb-dump/vdb-dump-coldefs.c index af221650..a6169f7e 100644 --- a/tools/vdb-dump/vdb-dump-coldefs.c +++ b/tools/vdb-dump/vdb-dump-coldefs.c @@ -844,3 +844,77 @@ rc_t vdcd_collect_spread( const struct num_gen * row_set, col_defs * cols, const } return rc; } + +static uint32_t same_values( const VCursor * curs, uint32_t col_idx, int64_t first, uint32_t test_rows ) +{ + uint32_t res = 0; + const void * base; + uint32_t elem_bits, boff, row_len; + rc_t rc = VCursorCellDataDirect( curs, first, col_idx, &elem_bits, &base, &boff, &row_len ); + while ( rc == 0 && res < test_rows && rc == 0 ) + { + const void * base_1; + uint32_t elem_bits_1, boff_1, row_len_1; + rc = VCursorCellDataDirect( curs, first + res + 1, col_idx, &elem_bits_1, &base_1, &boff_1, &row_len_1 ); + if ( rc == 0 ) + { + if ( elem_bits != elem_bits_1 ) return res; + if ( boff != boff_1 ) return res; + if ( row_len != row_len_1 ) return res; + if ( base != base_1 ) return res; + } + res += 1; + } + return res; +} + +static bool vdcd_is_static_column( const VTable *my_table, col_def * col ) +{ + bool res = false; + const VCursor * curs; + rc_t rc = VTableCreateCursorRead( my_table, &curs ); + if ( rc == 0 ) + { + uint32_t idx; + rc = VCursorAddColumn( curs, &idx, "%s", col->name ); + if ( rc == 0 ) + { + rc = VCursorOpen( curs ); + if ( rc == 0 ) + { + int64_t first; + uint64_t count; + rc = VCursorIdRange( curs, idx, &first, &count ); + if ( rc == 0 && count == 0 ) + { + res = ( same_values( curs, idx, first, 100 ) == 100 ); + } + } + } + VCursorRelease( curs ); + } + return res; +} + + +bool vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, const size_t str_limit ) +{ + col_defs * temp_defs; + bool res = vdcd_init( &temp_defs, str_limit ); + if ( res ) + { + uint32_t count = vdcd_extract_from_table( temp_defs, my_table ); + uint32_t idx; + for ( idx = 0; idx < count; ++idx ) + { + col_def * col = VectorGet( &(temp_defs->cols), idx ); + if ( col != NULL ) + { + if ( vdcd_is_static_column( my_table, col ) ) + vdcd_append_col( defs, col->name ); + } + } + vdcd_destroy( temp_defs ); + } + return res; +} \ No newline at end of file diff --git a/tools/vdb-dump/vdb-dump-coldefs.h b/tools/vdb-dump/vdb-dump-coldefs.h index fb81d044..5c19bcf8 100644 --- a/tools/vdb-dump/vdb-dump-coldefs.h +++ b/tools/vdb-dump/vdb-dump-coldefs.h @@ -101,6 +101,8 @@ void vdcd_ins_trans_fkt( col_defs* defs, const VSchema *my_schema ); void vdcd_exclude_these_columns( col_defs* defs, const char* column_names ); bool vdcd_get_first_none_static_column_idx( col_defs* defs, const VCursor * cur, uint32_t * idx ); +bool vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, const size_t str_limit ); + rc_t vdcd_collect_spread( const struct num_gen * row_set, col_defs * cols, const VCursor * cursor ); #ifdef __cplusplus diff --git a/tools/vdb-dump/vdb-dump-context.c b/tools/vdb-dump/vdb-dump-context.c index e3ac2d7b..bb0c0a1a 100644 --- a/tools/vdb-dump/vdb-dump-context.c +++ b/tools/vdb-dump/vdb-dump-context.c @@ -597,6 +597,7 @@ static void vdco_evaluate_options( const Args *my_args, ctx->show_blobbing = vdco_get_bool_option( my_args, OPTION_SHOW_BLOBBING, false ); ctx->enum_phys = vdco_get_bool_option( my_args, OPTION_ENUM_PHYS, false ); ctx->enum_readable = vdco_get_bool_option( my_args, OPTION_ENUM_READABLE, false ); + ctx->enum_static = vdco_get_bool_option( my_args, OPTION_ENUM_STATIC, false ); ctx->idx_enum_requested = vdco_get_bool_option( my_args, OPTION_IDX_ENUM, false ); ctx->disable_multithreading = vdco_get_bool_option( my_args, OPTION_NO_MULTITHREAD, false ); ctx->print_info = vdco_get_bool_option( my_args, OPTION_INFO, false ); diff --git a/tools/vdb-dump/vdb-dump-context.h b/tools/vdb-dump/vdb-dump-context.h index 2bd5d545..8edd9358 100644 --- a/tools/vdb-dump/vdb-dump-context.h +++ b/tools/vdb-dump/vdb-dump-context.h @@ -68,6 +68,7 @@ extern "C" { #define OPTION_SHOW_BLOBBING "blobbing" #define OPTION_ENUM_PHYS "phys" #define OPTION_ENUM_READABLE "readable" +#define OPTION_ENUM_STATIC "static" #define OPTION_IDX_ENUM "idx-report" #define OPTION_IDX_RANGE "idx-range" #define OPTION_CUR_CACHE "cur-cache" @@ -186,6 +187,7 @@ typedef struct dump_context bool show_blobbing; bool enum_phys; bool enum_readable; + bool enum_static; bool idx_enum_requested; bool idx_range_requested; bool disable_multithreading; diff --git a/tools/vdb-dump/vdb-dump.c b/tools/vdb-dump/vdb-dump.c index fe5eff4e..abac29fa 100644 --- a/tools/vdb-dump/vdb-dump.c +++ b/tools/vdb-dump/vdb-dump.c @@ -104,6 +104,7 @@ static const char * numelemsum_usage[] = { "sum element-count", static const char * show_blobbing_usage[] = { "show blobbing", NULL }; static const char * enum_phys_usage[] = { "enumerate physical columns", NULL }; static const char * enum_readable_usage[] = { "enumerate readable columns", NULL }; +static const char * enum_static_usage[] = { "enumerate static columns", NULL }; static const char * objtype_usage[] = { "report type of object", NULL }; static const char * idx_enum_usage[] = { "enumerate all available index", NULL }; static const char * idx_range_usage[] = { "enumerate values and row-ranges of one index", NULL }; @@ -149,6 +150,7 @@ OptDef DumpOptions[] = { OPTION_SHOW_BLOBBING, NULL, NULL, show_blobbing_usage, 1, false, false }, { OPTION_ENUM_PHYS, NULL, NULL, enum_phys_usage, 1, false, false }, { OPTION_ENUM_READABLE, NULL, NULL, enum_readable_usage, 1, false, false }, + { OPTION_ENUM_STATIC, NULL, NULL, enum_static_usage, 1, false, false }, { OPTION_OBJVER, ALIAS_OBJVER, NULL, objver_usage, 1, false, false }, { OPTION_OBJTS, NULL, NULL, objts_usage, 1, false, false }, { OPTION_OBJTYPE, ALIAS_OBJTYPE, NULL, objtype_usage, 1, false, false }, @@ -558,6 +560,24 @@ static bool vdm_extract_or_parse_phys_columns( const p_dump_context ctx, return res; } + +static bool vdm_extract_or_parse_static_columns( const p_dump_context ctx, + const VTable *my_table, + p_col_defs my_col_defs ) +{ + bool res = false; + if ( ctx != NULL && my_col_defs != NULL ) + { + /* the user does not know the column-names or wants all of them */ + res = vdcd_extract_static_columns( my_col_defs, my_table, ctx->max_line_len ); + + if ( ctx->excluded_columns != NULL ) + vdcd_exclude_these_columns( my_col_defs, ctx->excluded_columns ); + } + return res; + +} + /************************************************************************************* dump_tab_table: * called by "dump_db_table()" and "dump_tab()" as a fkt-pointer @@ -1299,6 +1319,7 @@ static rc_t vdm_enum_readable_columns( const VTable *my_table ) return rc; } + /************************************************************************************* enum_tab_columns: * called by "enum_db_columns()" and "dump_table()" as fkt-pointer @@ -1344,6 +1365,13 @@ static rc_t vdm_enum_tab_columns( const p_dump_context ctx, const VTable *my_tab rc = VTableOpenKTableRead( my_table, &ci_ctx.my_ktable ); DISP_RC( rc, "VTableOpenKTableRead() failed" ); } + if ( ctx->enum_static ) + { + extracted = vdm_extract_or_parse_static_columns( ctx, my_table, my_col_defs ); + rc = VTableOpenKTableRead( my_table, &ci_ctx.my_ktable ); + DISP_RC( rc, "VTableOpenKTableRead() failed" ); + + } else { extracted = vdm_extract_or_parse_columns( ctx, my_table, my_col_defs ); From e9b02656f417c0d0464dbb2738f900cf439c4a9f Mon Sep 17 00:00:00 2001 From: wraetz Date: Mon, 10 Apr 2017 12:26:23 -0400 Subject: [PATCH 12/18] function added to just enumerate or print static columns --- tools/vdb-dump/vdb-dump-coldefs.c | 22 +++++++++++++++------- tools/vdb-dump/vdb-dump-coldefs.h | 2 +- tools/vdb-dump/vdb-dump.c | 29 ++++++++++++++++++++++++++--- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/tools/vdb-dump/vdb-dump-coldefs.c b/tools/vdb-dump/vdb-dump-coldefs.c index a6169f7e..5fd1c4d7 100644 --- a/tools/vdb-dump/vdb-dump-coldefs.c +++ b/tools/vdb-dump/vdb-dump-coldefs.c @@ -868,7 +868,7 @@ static uint32_t same_values( const VCursor * curs, uint32_t col_idx, int64_t fir return res; } -static bool vdcd_is_static_column( const VTable *my_table, col_def * col ) +static bool vdcd_is_static_column( const VTable *my_table, col_def * col, uint32_t test_rows ) { bool res = false; const VCursor * curs; @@ -887,7 +887,7 @@ static bool vdcd_is_static_column( const VTable *my_table, col_def * col ) rc = VCursorIdRange( curs, idx, &first, &count ); if ( rc == 0 && count == 0 ) { - res = ( same_values( curs, idx, first, 100 ) == 100 ); + res = ( same_values( curs, idx, first, test_rows ) == test_rows ); } } } @@ -897,11 +897,13 @@ static bool vdcd_is_static_column( const VTable *my_table, col_def * col ) } -bool vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, const size_t str_limit ) +#define TEST_ROWS 20 + +uint32_t vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, const size_t str_limit ) { col_defs * temp_defs; - bool res = vdcd_init( &temp_defs, str_limit ); - if ( res ) + uint32_t res = 0; + if ( vdcd_init( &temp_defs, str_limit ) ) { uint32_t count = vdcd_extract_from_table( temp_defs, my_table ); uint32_t idx; @@ -910,8 +912,14 @@ bool vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, const col_def * col = VectorGet( &(temp_defs->cols), idx ); if ( col != NULL ) { - if ( vdcd_is_static_column( my_table, col ) ) - vdcd_append_col( defs, col->name ); + if ( vdcd_is_static_column( my_table, col, TEST_ROWS ) ) + { + p_col_def c = vdcd_append_col( defs, col->name ); + if ( c != NULL ) + { + res++; + } + } } } vdcd_destroy( temp_defs ); diff --git a/tools/vdb-dump/vdb-dump-coldefs.h b/tools/vdb-dump/vdb-dump-coldefs.h index 5c19bcf8..c0c48416 100644 --- a/tools/vdb-dump/vdb-dump-coldefs.h +++ b/tools/vdb-dump/vdb-dump-coldefs.h @@ -101,7 +101,7 @@ void vdcd_ins_trans_fkt( col_defs* defs, const VSchema *my_schema ); void vdcd_exclude_these_columns( col_defs* defs, const char* column_names ); bool vdcd_get_first_none_static_column_idx( col_defs* defs, const VCursor * cur, uint32_t * idx ); -bool vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, const size_t str_limit ); +uint32_t vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, const size_t str_limit ); rc_t vdcd_collect_spread( const struct num_gen * row_set, col_defs * cols, const VCursor * cursor ); diff --git a/tools/vdb-dump/vdb-dump.c b/tools/vdb-dump/vdb-dump.c index abac29fa..a058c4c2 100644 --- a/tools/vdb-dump/vdb-dump.c +++ b/tools/vdb-dump/vdb-dump.c @@ -524,8 +524,29 @@ static uint32_t vdm_extract_or_parse_columns( const p_dump_context ctx, { bool cols_unknown = ( ( ctx->columns == NULL ) || ( string_cmp( ctx->columns, 1, "*", 1, 1 ) == 0 ) ); if ( cols_unknown ) - /* the user does not know the column-names or wants all of them */ - count = vdcd_extract_from_table( my_col_defs, my_table ); + { + if ( ctx->enum_static ) + { + /* the user wants to see only the static columns */ + count = vdcd_extract_static_columns( my_col_defs, my_table, ctx->max_line_len ); + if ( count > 0 ) + { + /* if we found some static columns, let's restrict the row-count + if the user did not give a specific row-set to just show row #1 */ + if ( ctx->rows == NULL ) + { + rc_t rc = num_gen_make_from_range( &ctx->rows, 1, 1 ); + DISP_RC( rc, "num_gen_make_from_range() failed" ); + } + + } + } + else + { + /* the user does not know the column-names or wants all of them */ + count = vdcd_extract_from_table( my_col_defs, my_table ); + } + } else /* the user knows the names of the wanted columns... */ count = vdcd_parse_string( my_col_defs, ctx->columns, my_table ); @@ -547,8 +568,10 @@ static bool vdm_extract_or_parse_phys_columns( const p_dump_context ctx, { bool cols_unknown = ( ( ctx->columns == NULL ) || ( string_cmp( ctx->columns, 1, "*", 1, 1 ) == 0 ) ); if ( cols_unknown ) + { /* the user does not know the column-names or wants all of them */ res = vdcd_extract_from_phys_table( my_col_defs, my_table ); + } else /* the user knows the names of the wanted columns... */ res = vdcd_parse_string( my_col_defs, ctx->columns, my_table ); @@ -569,7 +592,7 @@ static bool vdm_extract_or_parse_static_columns( const p_dump_context ctx, if ( ctx != NULL && my_col_defs != NULL ) { /* the user does not know the column-names or wants all of them */ - res = vdcd_extract_static_columns( my_col_defs, my_table, ctx->max_line_len ); + res = ( vdcd_extract_static_columns( my_col_defs, my_table, ctx->max_line_len ) > 0 ); if ( ctx->excluded_columns != NULL ) vdcd_exclude_these_columns( my_col_defs, ctx->excluded_columns ); From 1017a588696ebcd5f8c11fe51e88c475a9814542 Mon Sep 17 00:00:00 2001 From: wraetz Date: Wed, 12 Apr 2017 15:40:28 -0400 Subject: [PATCH 13/18] fastq-option fixed --- tools/vdb-dump/vdb-dump-coldefs.c | 24 ++++++++++++++++++++++-- tools/vdb-dump/vdb-dump-fastq.c | 17 ++++++++++------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/tools/vdb-dump/vdb-dump-coldefs.c b/tools/vdb-dump/vdb-dump-coldefs.c index 5fd1c4d7..af888703 100644 --- a/tools/vdb-dump/vdb-dump-coldefs.c +++ b/tools/vdb-dump/vdb-dump-coldefs.c @@ -868,7 +868,7 @@ static uint32_t same_values( const VCursor * curs, uint32_t col_idx, int64_t fir return res; } -static bool vdcd_is_static_column( const VTable *my_table, col_def * col, uint32_t test_rows ) +static bool vdcd_is_static_column1( const VTable *my_table, col_def * col, uint32_t test_rows ) { bool res = false; const VCursor * curs; @@ -896,6 +896,26 @@ static bool vdcd_is_static_column( const VTable *my_table, col_def * col, uint32 return res; } +static bool vdcd_is_static_column2( const VTable *my_table, col_def * col ) +{ + bool res = false; + const VCursor * curs; + rc_t rc = VTableCreateCursorRead( my_table, &curs ); + if ( rc == 0 ) + { + uint32_t idx; + rc = VCursorAddColumn( curs, &idx, "%s", col->name ); + if ( rc == 0 ) + { + rc = VCursorOpen( curs ); + if ( rc == 0 ) + rc = VCursorIsStaticColumn( curs, idx, &res ); + } + VCursorRelease( curs ); + } + return res; +} + #define TEST_ROWS 20 @@ -912,7 +932,7 @@ uint32_t vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, co col_def * col = VectorGet( &(temp_defs->cols), idx ); if ( col != NULL ) { - if ( vdcd_is_static_column( my_table, col, TEST_ROWS ) ) + if ( vdcd_is_static_column1( my_table, col ) ) { p_col_def c = vdcd_append_col( defs, col->name ); if ( c != NULL ) diff --git a/tools/vdb-dump/vdb-dump-fastq.c b/tools/vdb-dump/vdb-dump-fastq.c index d02fef3b..9bc6f8e6 100644 --- a/tools/vdb-dump/vdb-dump-fastq.c +++ b/tools/vdb-dump/vdb-dump-fastq.c @@ -464,15 +464,18 @@ static rc_t print_qual( const char * qual, uint32_t count, uint32_t max_line_len rc = KOutMsg( "%s", buffer ); on_line = num_writ; } - if ( ( on_line + num_writ + 1 ) < max_line_len ) - { - rc = KOutMsg( " %s", buffer ); - on_line += ( num_writ + 1 ); - } else { - rc = KOutMsg( "\n%s", buffer ); - on_line = num_writ; + if ( ( on_line + num_writ + 1 ) < max_line_len ) + { + rc = KOutMsg( " %s", buffer ); + on_line += ( num_writ + 1 ); + } + else + { + rc = KOutMsg( "\n%s", buffer ); + on_line = num_writ; + } } i++; } From 88f574e1b2f721986a64734597915ac16db4493f Mon Sep 17 00:00:00 2001 From: wraetz Date: Wed, 12 Apr 2017 16:37:33 -0400 Subject: [PATCH 14/18] test function needed one more argument --- tools/vdb-dump/vdb-dump-coldefs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/vdb-dump/vdb-dump-coldefs.c b/tools/vdb-dump/vdb-dump-coldefs.c index af888703..1e13a2a5 100644 --- a/tools/vdb-dump/vdb-dump-coldefs.c +++ b/tools/vdb-dump/vdb-dump-coldefs.c @@ -932,7 +932,7 @@ uint32_t vdcd_extract_static_columns( col_defs* defs, const VTable *my_table, co col_def * col = VectorGet( &(temp_defs->cols), idx ); if ( col != NULL ) { - if ( vdcd_is_static_column1( my_table, col ) ) + if ( vdcd_is_static_column1( my_table, col, TEST_ROWS ) ) { p_col_def c = vdcd_append_col( defs, col->name ); if ( c != NULL ) From 30451549ad2de94b40e7b535ffe3890a1d5d7039 Mon Sep 17 00:00:00 2001 From: kwrodarmer Date: Wed, 26 Apr 2017 12:24:19 -0400 Subject: [PATCH 15/18] spelling modification --- tools/vdb-config/interactive.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/vdb-config/interactive.cpp b/tools/vdb-config/interactive.cpp index 66af49f4..6bb0903e 100644 --- a/tools/vdb-config/interactive.cpp +++ b/tools/vdb-config/interactive.cpp @@ -883,7 +883,7 @@ bool vdbconf_controller::on_set_location_error( Dlg &dlg, ESetRootState s ) case eSetRootState_NewDirNotEmpty : vdbconf_msg( dlg, r, "the given location is not empty" ); break; case eSetRootState_NewNotDir : vdbconf_msg( dlg, r, "new location is not a directory" ); break; case eSetRootState_Error : vdbconf_msg( dlg, r, "error changing location" ); break; - default : vdbconf_msg( dlg, r, "unknow enum" ); break; + default : vdbconf_msg( dlg, r, "unknown enum" ); break; } return result; } From e919017db37385330723a2c9cc0dd06f36642467 Mon Sep 17 00:00:00 2001 From: kwrodarmer Date: Wed, 26 Apr 2017 12:27:24 -0400 Subject: [PATCH 16/18] switched vTable based upon existence of a mapping file --- tools/sra-sort/buff-writer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/sra-sort/buff-writer.c b/tools/sra-sort/buff-writer.c index 9c4b9e48..70ceadc5 100644 --- a/tools/sra-sort/buff-writer.c +++ b/tools/sra-sort/buff-writer.c @@ -761,7 +761,8 @@ ColumnWriter *cSRATblPairMakeBufferedIdRemapColumnWriter ( cSRATblPair *self, TRY ( buff = MemAlloc ( ctx, sizeof * buff, true ) ) { - TRY ( ColumnWriterInit ( & buff -> dad, ctx, & MappedBufferedPairColWriter_vt, idx != NULL ) ) + TRY ( ColumnWriterInit ( & buff -> dad, ctx, + idx != NULL ? & MappedBufferedPairColWriter_vt : & UnmappedBufferedPairColWriter_vt, idx != NULL ) ) { /* duplicate our friend */ TRY ( buff -> cw = ColumnWriterDuplicate ( writer, ctx ) ) From 57064f4b5f1316a6a15462e0d01563e4da7bfa6e Mon Sep 17 00:00:00 2001 From: klymenko Date: Fri, 19 May 2017 12:26:51 -0400 Subject: [PATCH 17/18] VDB-3347: cSRAPairMake: exclude_tbls should be NULL-terminated --- tools/sra-sort/csra-pair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/sra-sort/csra-pair.c b/tools/sra-sort/csra-pair.c index f58d9ffc..ff2bb3e0 100644 --- a/tools/sra-sort/csra-pair.c +++ b/tools/sra-sort/csra-pair.c @@ -214,7 +214,8 @@ DbPair *cSRAPairMake ( const ctx_t *ctx, "PRIMARY_ALIGNMENT", "REFERENCE", "SECONDARY_ALIGNMENT", - "SEQUENCE" + "SEQUENCE", + NULL }; db -> dad . exclude_tbls = exclude_tbls; From af5b85b9dce21cb6617724fad7f00a1e4827a09c Mon Sep 17 00:00:00 2001 From: kwrodarmer Date: Thu, 27 Jul 2017 16:07:03 -0400 Subject: [PATCH 18/18] added rules to build two versions of sra-sort for SEQUENCE_BEFORE_SECONDARY --- tools/sra-sort/Makefile | 76 ++++++++++++++++++++++++++++------------------ tools/sra-sort/csra-pair.h | 3 +- 2 files changed, 48 insertions(+), 31 deletions(-) diff --git a/tools/sra-sort/Makefile b/tools/sra-sort/Makefile index 7cfe85be..3f20cdf4 100644 --- a/tools/sra-sort/Makefile +++ b/tools/sra-sort/Makefile @@ -36,7 +36,7 @@ INT_TOOLS = \ EXT_TOOLS = \ ifneq (win,$(OS)) -EXT_TOOLS += sra-sort +EXT_TOOLS += sra-sort sra-sort-cg endif ALL_TOOLS = \ @@ -65,40 +65,53 @@ clean: stdclean .PHONY: clean #------------------------------------------------------------------------------- +# special rules for enabling a define +# +%.cg.$(OBJX): %.c + $(CC) -o $@ -DSEQUENCE_BEFORE_SECONDARY=0 $< $(LOC_INFO) -D__file_ext__=c + +#------------------------------------------------------------------------------- # sra-sort # -SRA_SORT_SRC = \ - caps \ - mem \ - membank \ - paged-membank \ - paged-mmapbank \ - except \ - idx-mapping \ - map-file \ - col-pair \ - row-set \ - simple-row-set \ - mapping-row-set \ - sorting-row-set \ - meta-pair \ - dir-pair \ - tbl-pair \ - db-pair \ - glob-poslen \ - poslen-col-pair \ - ref-alignid-col \ - buff-writer \ - id-mapper-col \ +SRA_SORT_SRC = \ + caps \ + mem \ + membank \ + paged-membank \ + paged-mmapbank \ + except \ + idx-mapping \ + map-file \ + col-pair \ + row-set \ + simple-row-set \ + mapping-row-set \ + sorting-row-set \ + meta-pair \ + dir-pair \ + tbl-pair \ + db-pair \ + glob-poslen \ + poslen-col-pair \ + ref-alignid-col \ + buff-writer \ + id-mapper-col \ capture-first-half-aligned \ - csra-tbl \ - csra-pair \ - run \ - sra-sort \ + run \ + sra-sort \ xcheck-ref-align -SRA_SORT_OBJ = \ - $(addsuffix .$(OBJX),$(SRA_SORT_SRC)) +SRA_SORT_SPECIAL_SRC = \ + csra-tbl \ + csra-pair + +SRA_SORT_OBJ = \ + $(addsuffix .$(OBJX),$(SRA_SORT_SRC)) \ + $(addsuffix .$(OBJX),$(SRA_SORT_SPECIAL_SRC)) + +SRA_SORT_CG_OBJ = \ + $(addsuffix .$(OBJX),$(SRA_SORT_SRC)) \ + $(addsuffix .cg.$(OBJX),$(SRA_SORT_SPECIAL_SRC)) SRA_SORT_LIB = \ -lkapp \ @@ -109,6 +122,9 @@ SRA_SORT_LIB = \ $(BINDIR)/sra-sort: $(SRA_SORT_OBJ) $(LD) --exe --vers $(SRCDIR)/../../shared/toolkit.vers -o $@ $^ $(SRA_SORT_LIB) +$(BINDIR)/sra-sort-cg: $(SRA_SORT_CG_OBJ) + $(LD) --exe --vers $(SRCDIR)/../../shared/toolkit.vers -o $@ $^ $(SRA_SORT_LIB) + #------------------------------------------------------------------------------- # dump-blob-boundaries # diff --git a/tools/sra-sort/csra-pair.h b/tools/sra-sort/csra-pair.h index 48791d03..de7b7bb3 100644 --- a/tools/sra-sort/csra-pair.h +++ b/tools/sra-sort/csra-pair.h @@ -32,8 +32,9 @@ #endif +#ifndef SEQUENCE_BEFORE_SECONDARY #define SEQUENCE_BEFORE_SECONDARY 1 - +#endif /*-------------------------------------------------------------------------- * forwards