diff --git a/htslib/synced_bcf_reader.h b/htslib/synced_bcf_reader.h index a81ac371d..e0455501c 100644 --- a/htslib/synced_bcf_reader.h +++ b/htslib/synced_bcf_reader.h @@ -99,7 +99,8 @@ typedef enum BCF_SR_PAIR_LOGIC, // combination of the PAIR_* values above BCF_SR_ALLOW_NO_IDX, // allow to proceed even if required index is not present (at the user's risk) BCF_SR_REGIONS_OVERLAP, // include overlapping records with POS outside the regions: 0=no, 1=VCF line overlap, 2=true variant overlap [1] - BCF_SR_TARGETS_OVERLAP // include overlapping records with POS outside the targets: 0=no, 1=VCF line overlap, 2=true variant overlap [0] + BCF_SR_TARGETS_OVERLAP, // include overlapping records with POS outside the targets: 0=no, 1=VCF line overlap, 2=true variant overlap [0] + BCF_SR_AUTO_TARGETS_FROM_REGIONS // route a dense single-base regions file through the streaming-targets code path; sets readers->targets, so incompatible with bcf_sr_set_targets() [off] } bcf_sr_opt_t; diff --git a/synced_bcf_reader.c b/synced_bcf_reader.c index 7769132fa..e2df73556 100644 --- a/synced_bcf_reader.c +++ b/synced_bcf_reader.c @@ -39,6 +39,7 @@ DEALINGS IN THE SOFTWARE. */ #include "htslib/kseq.h" #include "htslib/khash_str2int.h" #include "htslib/bgzf.h" +#include "htslib/hfile.h" #include "htslib/thread_pool.h" #include "bcf_sr_sort.h" @@ -70,6 +71,7 @@ typedef struct sr_sort_t sort; int regions_overlap, targets_overlap; int *closefile; // close htsfile with sync reader close or not + int auto_targets_from_regions; // BCF_SR_AUTO_TARGETS_FROM_REGIONS opt-in } aux_t; @@ -141,6 +143,10 @@ int bcf_sr_set_opt(bcf_srs_t *readers, bcf_sr_opt_t opt, ...) if ( readers->targets ) readers->targets->overlap = BCF_SR_AUX(readers)->targets_overlap; return 0; + case BCF_SR_AUTO_TARGETS_FROM_REGIONS: + BCF_SR_AUX(readers)->auto_targets_from_regions = 1; + return 0; + default: break; } @@ -187,6 +193,88 @@ static int *init_filters(bcf_hdr_t *hdr, const char *filters, int *nfilters) return NULL; } +// Sniff a regions BED/TSV: returns 1 iff (a) the path is a regular local +// file we can safely re-open, (b) the first SNIFF_LINES non-comment entries +// are *all* single-base regions (CHROM\tN-1\tN), and (c) those entries are +// densely packed (average intra-chromosome inter-entry distance below +// DENSITY_BP). Used by bcf_sr_set_regions() when the caller has opted in +// via BCF_SR_AUTO_TARGETS_FROM_REGIONS to auto-promote dense single-base +// BEDs to the streaming-targets path (samtools/bcftools#2557): the default +// per-region `tbx_itr_queryi()` path is 300-500x slower than a sequential +// scan at SNP-panel sizes (84M entries / 23GB VCF didn't complete in 11+ +// hours of 100% CPU in production). +// +// The density check rejects sparse BEDs (e.g. ~11 SNPs per chromosome) +// where streaming would do a full-chromosome scan to satisfy a handful of +// records — pre-promotion the tabix seek path is the cheap one there. +#define BCF_SR_REGIONS_SNIFF_LINES 256 +#define BCF_SR_REGIONS_DENSITY_BP 10000 +static int sniff_regions_singlebase(const char *fname) +{ + // Bail on inputs whose bytes are gone after the sniff close: FIFOs, + // stdin, character devices. Each subsequent hts_open() in + // bcf_sr_regions_init() would see a truncated stream (or block) for + // these. Remote URLs reopen freshly per hts_open() and are safe in + // principle, but the extra round-trip outweighs the heuristic value + // here — keep the auto-promote local-files-only. + if ( hisremote(fname) ) return 0; + if ( fname[0]=='-' && fname[1]==0 ) return 0; + struct stat sb; + if ( stat(fname, &sb) != 0 ) return 0; + if ( !S_ISREG(sb.st_mode) ) return 0; + + htsFile *fp = hts_open(fname, "r"); + if (!fp) return 0; + kstring_t line = {0,0,0}; + kstring_t prev_chr = {0,0,0}; + hts_pos_t prev_pos = -1; + hts_pos_t total_intra_dist = 0; + int n = 0, n_intra = 0, all_singlebase = 1; + while (n < BCF_SR_REGIONS_SNIFF_LINES) + { + int ret = hts_getline(fp, KS_SEP_LINE, &line); + if (ret < 0) break; // EOF or read error + if (line.l == 0 || line.s[0] == '#') continue; // skip headers/comments + // Parse CHROM\tSTART\tEND[\t...]. BED 1-bp regions have END-START==1. + char *chr = line.s; + char *p = line.s; + while (*p && *p != '\t') p++; + if (*p != '\t') { all_singlebase = 0; break; } + size_t chr_len = p - chr; + char *start_s = ++p; + while (*p && *p != '\t') p++; + if (*p != '\t') { all_singlebase = 0; break; } + char *end_s = ++p; + hts_pos_t start = strtoll(start_s, NULL, 10); + hts_pos_t end = strtoll(end_s, NULL, 10); + if (end - start != 1) { all_singlebase = 0; break; } + + if ( prev_chr.l == chr_len && prev_pos >= 0 && + memcmp(prev_chr.s, chr, chr_len) == 0 ) + { + hts_pos_t d = start - prev_pos; + if (d < 0) d = -d; + total_intra_dist += d; + n_intra++; + } + prev_chr.l = 0; + kputsn(chr, chr_len, &prev_chr); + prev_pos = start; + n++; + } + free(line.s); + free(prev_chr.s); + if (hts_close(fp) < 0) + hts_log_error("Error on closing %s", fname); + if ( !all_singlebase || n != BCF_SR_REGIONS_SNIFF_LINES ) return 0; + // Need at least one same-chrom comparison and the sample must be dense. + // (If every entry sits on a different chromosome, the panel is sparse + // by construction — reject.) + if ( n_intra == 0 ) return 0; + if ( total_intra_dist / n_intra > BCF_SR_REGIONS_DENSITY_BP ) return 0; + return 1; +} + int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file) { if ( readers->nreaders || readers->regions ) @@ -197,6 +285,24 @@ int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file) return 0; } + // #2557 fastpath: a dense single-base BED (typical SNP panel: HGDP+1kGP + // 84M sites, AADR 1240k, PGS Catalog) hits a 300-500x per-region seek + // overhead in the default path. The streaming-targets code path + // (bcf_sr_set_targets) handles the same workload at near-baseline + // speed. Opt-in via BCF_SR_AUTO_TARGETS_FROM_REGIONS — when set, the + // sniffer decides whether the file qualifies. The opt-in is required + // because this routes regions through readers->targets, which is + // observable to callers and incompatible with a subsequent + // bcf_sr_set_targets() call. The 0/1/2 --regions-overlap semantics + // match --targets-overlap so the user-set value carries over unchanged. + if ( is_file + && BCF_SR_AUX(readers)->auto_targets_from_regions + && sniff_regions_singlebase(regions) ) + { + BCF_SR_AUX(readers)->targets_overlap = BCF_SR_AUX(readers)->regions_overlap; + return bcf_sr_set_targets(readers, regions, is_file, 0); + } + readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2); if ( !readers->regions ) return -1; readers->explicit_regs = 1; diff --git a/test/bcf-sr/regions-fastpath.bed b/test/bcf-sr/regions-fastpath.bed new file mode 100644 index 000000000..628e24d4c --- /dev/null +++ b/test/bcf-sr/regions-fastpath.bed @@ -0,0 +1,300 @@ +1 99 100 +1 199 200 +1 299 300 +1 399 400 +1 499 500 +1 599 600 +1 699 700 +1 799 800 +1 899 900 +1 999 1000 +1 1099 1100 +1 1199 1200 +1 1299 1300 +1 1399 1400 +1 1499 1500 +1 1599 1600 +1 1699 1700 +1 1799 1800 +1 1899 1900 +1 1999 2000 +1 2099 2100 +1 2199 2200 +1 2299 2300 +1 2399 2400 +1 2499 2500 +1 2599 2600 +1 2699 2700 +1 2799 2800 +1 2899 2900 +1 2999 3000 +1 3099 3100 +1 3199 3200 +1 3299 3300 +1 3399 3400 +1 3499 3500 +1 3599 3600 +1 3699 3700 +1 3799 3800 +1 3899 3900 +1 3999 4000 +1 4099 4100 +1 4199 4200 +1 4299 4300 +1 4399 4400 +1 4499 4500 +1 4599 4600 +1 4699 4700 +1 4799 4800 +1 4899 4900 +1 4999 5000 +1 5099 5100 +1 5199 5200 +1 5299 5300 +1 5399 5400 +1 5499 5500 +1 5599 5600 +1 5699 5700 +1 5799 5800 +1 5899 5900 +1 5999 6000 +1 6099 6100 +1 6199 6200 +1 6299 6300 +1 6399 6400 +1 6499 6500 +1 6599 6600 +1 6699 6700 +1 6799 6800 +1 6899 6900 +1 6999 7000 +1 7099 7100 +1 7199 7200 +1 7299 7300 +1 7399 7400 +1 7499 7500 +1 7599 7600 +1 7699 7700 +1 7799 7800 +1 7899 7900 +1 7999 8000 +1 8099 8100 +1 8199 8200 +1 8299 8300 +1 8399 8400 +1 8499 8500 +1 8599 8600 +1 8699 8700 +1 8799 8800 +1 8899 8900 +1 8999 9000 +1 9099 9100 +1 9199 9200 +1 9299 9300 +1 9399 9400 +1 9499 9500 +1 9599 9600 +1 9699 9700 +1 9799 9800 +1 9899 9900 +1 9999 10000 +2 99 100 +2 199 200 +2 299 300 +2 399 400 +2 499 500 +2 599 600 +2 699 700 +2 799 800 +2 899 900 +2 999 1000 +2 1099 1100 +2 1199 1200 +2 1299 1300 +2 1399 1400 +2 1499 1500 +2 1599 1600 +2 1699 1700 +2 1799 1800 +2 1899 1900 +2 1999 2000 +2 2099 2100 +2 2199 2200 +2 2299 2300 +2 2399 2400 +2 2499 2500 +2 2599 2600 +2 2699 2700 +2 2799 2800 +2 2899 2900 +2 2999 3000 +2 3099 3100 +2 3199 3200 +2 3299 3300 +2 3399 3400 +2 3499 3500 +2 3599 3600 +2 3699 3700 +2 3799 3800 +2 3899 3900 +2 3999 4000 +2 4099 4100 +2 4199 4200 +2 4299 4300 +2 4399 4400 +2 4499 4500 +2 4599 4600 +2 4699 4700 +2 4799 4800 +2 4899 4900 +2 4999 5000 +2 5099 5100 +2 5199 5200 +2 5299 5300 +2 5399 5400 +2 5499 5500 +2 5599 5600 +2 5699 5700 +2 5799 5800 +2 5899 5900 +2 5999 6000 +2 6099 6100 +2 6199 6200 +2 6299 6300 +2 6399 6400 +2 6499 6500 +2 6599 6600 +2 6699 6700 +2 6799 6800 +2 6899 6900 +2 6999 7000 +2 7099 7100 +2 7199 7200 +2 7299 7300 +2 7399 7400 +2 7499 7500 +2 7599 7600 +2 7699 7700 +2 7799 7800 +2 7899 7900 +2 7999 8000 +2 8099 8100 +2 8199 8200 +2 8299 8300 +2 8399 8400 +2 8499 8500 +2 8599 8600 +2 8699 8700 +2 8799 8800 +2 8899 8900 +2 8999 9000 +2 9099 9100 +2 9199 9200 +2 9299 9300 +2 9399 9400 +2 9499 9500 +2 9599 9600 +2 9699 9700 +2 9799 9800 +2 9899 9900 +2 9999 10000 +3 99 100 +3 199 200 +3 299 300 +3 399 400 +3 499 500 +3 599 600 +3 699 700 +3 799 800 +3 899 900 +3 999 1000 +3 1099 1100 +3 1199 1200 +3 1299 1300 +3 1399 1400 +3 1499 1500 +3 1599 1600 +3 1699 1700 +3 1799 1800 +3 1899 1900 +3 1999 2000 +3 2099 2100 +3 2199 2200 +3 2299 2300 +3 2399 2400 +3 2499 2500 +3 2599 2600 +3 2699 2700 +3 2799 2800 +3 2899 2900 +3 2999 3000 +3 3099 3100 +3 3199 3200 +3 3299 3300 +3 3399 3400 +3 3499 3500 +3 3599 3600 +3 3699 3700 +3 3799 3800 +3 3899 3900 +3 3999 4000 +3 4099 4100 +3 4199 4200 +3 4299 4300 +3 4399 4400 +3 4499 4500 +3 4599 4600 +3 4699 4700 +3 4799 4800 +3 4899 4900 +3 4999 5000 +3 5099 5100 +3 5199 5200 +3 5299 5300 +3 5399 5400 +3 5499 5500 +3 5599 5600 +3 5699 5700 +3 5799 5800 +3 5899 5900 +3 5999 6000 +3 6099 6100 +3 6199 6200 +3 6299 6300 +3 6399 6400 +3 6499 6500 +3 6599 6600 +3 6699 6700 +3 6799 6800 +3 6899 6900 +3 6999 7000 +3 7099 7100 +3 7199 7200 +3 7299 7300 +3 7399 7400 +3 7499 7500 +3 7599 7600 +3 7699 7700 +3 7799 7800 +3 7899 7900 +3 7999 8000 +3 8099 8100 +3 8199 8200 +3 8299 8300 +3 8399 8400 +3 8499 8500 +3 8599 8600 +3 8699 8700 +3 8799 8800 +3 8899 8900 +3 8999 9000 +3 9099 9100 +3 9199 9200 +3 9299 9300 +3 9399 9400 +3 9499 9500 +3 9599 9600 +3 9699 9700 +3 9799 9800 +3 9899 9900 +3 9999 10000 diff --git a/test/bcf-sr/regions-fastpath.vcf b/test/bcf-sr/regions-fastpath.vcf new file mode 100644 index 000000000..39f2b044a --- /dev/null +++ b/test/bcf-sr/regions-fastpath.vcf @@ -0,0 +1,606 @@ +##fileformat=VCFv4.2 +##contig= +##contig= +##contig= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 +1 100 . A G 100 PASS . GT 0/1 +1 150 . A G 100 PASS . GT 0/1 +1 200 . A G 100 PASS . GT 0/1 +1 250 . A G 100 PASS . GT 0/1 +1 300 . A G 100 PASS . GT 0/1 +1 350 . A G 100 PASS . GT 0/1 +1 400 . A G 100 PASS . GT 0/1 +1 450 . A G 100 PASS . GT 0/1 +1 500 . A G 100 PASS . GT 0/1 +1 550 . A G 100 PASS . GT 0/1 +1 600 . A G 100 PASS . GT 0/1 +1 650 . A G 100 PASS . GT 0/1 +1 700 . A G 100 PASS . GT 0/1 +1 750 . A G 100 PASS . GT 0/1 +1 800 . A G 100 PASS . GT 0/1 +1 850 . A G 100 PASS . GT 0/1 +1 900 . A G 100 PASS . GT 0/1 +1 950 . A G 100 PASS . GT 0/1 +1 1000 . A G 100 PASS . GT 0/1 +1 1050 . A G 100 PASS . GT 0/1 +1 1100 . A G 100 PASS . GT 0/1 +1 1150 . A G 100 PASS . GT 0/1 +1 1200 . A G 100 PASS . GT 0/1 +1 1250 . A G 100 PASS . GT 0/1 +1 1300 . A G 100 PASS . GT 0/1 +1 1350 . A G 100 PASS . GT 0/1 +1 1400 . A G 100 PASS . GT 0/1 +1 1450 . A G 100 PASS . GT 0/1 +1 1500 . A G 100 PASS . GT 0/1 +1 1550 . A G 100 PASS . GT 0/1 +1 1600 . A G 100 PASS . GT 0/1 +1 1650 . A G 100 PASS . GT 0/1 +1 1700 . A G 100 PASS . GT 0/1 +1 1750 . A G 100 PASS . GT 0/1 +1 1800 . A G 100 PASS . GT 0/1 +1 1850 . A G 100 PASS . GT 0/1 +1 1900 . A G 100 PASS . GT 0/1 +1 1950 . A G 100 PASS . GT 0/1 +1 2000 . A G 100 PASS . GT 0/1 +1 2050 . A G 100 PASS . GT 0/1 +1 2100 . A G 100 PASS . GT 0/1 +1 2150 . A G 100 PASS . GT 0/1 +1 2200 . A G 100 PASS . GT 0/1 +1 2250 . A G 100 PASS . GT 0/1 +1 2300 . A G 100 PASS . GT 0/1 +1 2350 . A G 100 PASS . GT 0/1 +1 2400 . A G 100 PASS . GT 0/1 +1 2450 . A G 100 PASS . GT 0/1 +1 2500 . A G 100 PASS . GT 0/1 +1 2550 . A G 100 PASS . GT 0/1 +1 2600 . A G 100 PASS . GT 0/1 +1 2650 . A G 100 PASS . GT 0/1 +1 2700 . A G 100 PASS . GT 0/1 +1 2750 . A G 100 PASS . GT 0/1 +1 2800 . A G 100 PASS . GT 0/1 +1 2850 . A G 100 PASS . GT 0/1 +1 2900 . A G 100 PASS . GT 0/1 +1 2950 . A G 100 PASS . GT 0/1 +1 3000 . A G 100 PASS . GT 0/1 +1 3050 . A G 100 PASS . GT 0/1 +1 3100 . A G 100 PASS . GT 0/1 +1 3150 . A G 100 PASS . GT 0/1 +1 3200 . A G 100 PASS . GT 0/1 +1 3250 . A G 100 PASS . GT 0/1 +1 3300 . A G 100 PASS . GT 0/1 +1 3350 . A G 100 PASS . GT 0/1 +1 3400 . A G 100 PASS . GT 0/1 +1 3450 . A G 100 PASS . GT 0/1 +1 3500 . A G 100 PASS . GT 0/1 +1 3550 . A G 100 PASS . GT 0/1 +1 3600 . A G 100 PASS . GT 0/1 +1 3650 . A G 100 PASS . GT 0/1 +1 3700 . A G 100 PASS . GT 0/1 +1 3750 . A G 100 PASS . GT 0/1 +1 3800 . A G 100 PASS . GT 0/1 +1 3850 . A G 100 PASS . GT 0/1 +1 3900 . A G 100 PASS . GT 0/1 +1 3950 . A G 100 PASS . GT 0/1 +1 4000 . A G 100 PASS . GT 0/1 +1 4050 . A G 100 PASS . GT 0/1 +1 4100 . A G 100 PASS . GT 0/1 +1 4150 . A G 100 PASS . GT 0/1 +1 4200 . A G 100 PASS . GT 0/1 +1 4250 . A G 100 PASS . GT 0/1 +1 4300 . A G 100 PASS . GT 0/1 +1 4350 . A G 100 PASS . GT 0/1 +1 4400 . A G 100 PASS . GT 0/1 +1 4450 . A G 100 PASS . GT 0/1 +1 4500 . A G 100 PASS . GT 0/1 +1 4550 . A G 100 PASS . GT 0/1 +1 4600 . A G 100 PASS . GT 0/1 +1 4650 . A G 100 PASS . GT 0/1 +1 4700 . A G 100 PASS . GT 0/1 +1 4750 . A G 100 PASS . GT 0/1 +1 4800 . A G 100 PASS . GT 0/1 +1 4850 . A G 100 PASS . GT 0/1 +1 4900 . A G 100 PASS . GT 0/1 +1 4950 . A G 100 PASS . GT 0/1 +1 5000 . A G 100 PASS . GT 0/1 +1 5050 . A G 100 PASS . GT 0/1 +1 5100 . A G 100 PASS . GT 0/1 +1 5150 . A G 100 PASS . GT 0/1 +1 5200 . A G 100 PASS . GT 0/1 +1 5250 . A G 100 PASS . GT 0/1 +1 5300 . A G 100 PASS . GT 0/1 +1 5350 . A G 100 PASS . GT 0/1 +1 5400 . A G 100 PASS . GT 0/1 +1 5450 . A G 100 PASS . GT 0/1 +1 5500 . A G 100 PASS . GT 0/1 +1 5550 . A G 100 PASS . GT 0/1 +1 5600 . A G 100 PASS . GT 0/1 +1 5650 . A G 100 PASS . GT 0/1 +1 5700 . A G 100 PASS . GT 0/1 +1 5750 . A G 100 PASS . GT 0/1 +1 5800 . A G 100 PASS . GT 0/1 +1 5850 . A G 100 PASS . GT 0/1 +1 5900 . A G 100 PASS . GT 0/1 +1 5950 . A G 100 PASS . GT 0/1 +1 6000 . A G 100 PASS . GT 0/1 +1 6050 . A G 100 PASS . GT 0/1 +1 6100 . A G 100 PASS . GT 0/1 +1 6150 . A G 100 PASS . GT 0/1 +1 6200 . A G 100 PASS . GT 0/1 +1 6250 . A G 100 PASS . GT 0/1 +1 6300 . A G 100 PASS . GT 0/1 +1 6350 . A G 100 PASS . GT 0/1 +1 6400 . A G 100 PASS . GT 0/1 +1 6450 . A G 100 PASS . GT 0/1 +1 6500 . A G 100 PASS . GT 0/1 +1 6550 . A G 100 PASS . GT 0/1 +1 6600 . A G 100 PASS . GT 0/1 +1 6650 . A G 100 PASS . GT 0/1 +1 6700 . A G 100 PASS . GT 0/1 +1 6750 . A G 100 PASS . GT 0/1 +1 6800 . A G 100 PASS . GT 0/1 +1 6850 . A G 100 PASS . GT 0/1 +1 6900 . A G 100 PASS . GT 0/1 +1 6950 . A G 100 PASS . GT 0/1 +1 7000 . A G 100 PASS . GT 0/1 +1 7050 . A G 100 PASS . GT 0/1 +1 7100 . A G 100 PASS . GT 0/1 +1 7150 . A G 100 PASS . GT 0/1 +1 7200 . A G 100 PASS . GT 0/1 +1 7250 . A G 100 PASS . GT 0/1 +1 7300 . A G 100 PASS . GT 0/1 +1 7350 . A G 100 PASS . GT 0/1 +1 7400 . A G 100 PASS . GT 0/1 +1 7450 . A G 100 PASS . GT 0/1 +1 7500 . A G 100 PASS . GT 0/1 +1 7550 . A G 100 PASS . GT 0/1 +1 7600 . A G 100 PASS . GT 0/1 +1 7650 . A G 100 PASS . GT 0/1 +1 7700 . A G 100 PASS . GT 0/1 +1 7750 . A G 100 PASS . GT 0/1 +1 7800 . A G 100 PASS . GT 0/1 +1 7850 . A G 100 PASS . GT 0/1 +1 7900 . A G 100 PASS . GT 0/1 +1 7950 . A G 100 PASS . GT 0/1 +1 8000 . A G 100 PASS . GT 0/1 +1 8050 . A G 100 PASS . GT 0/1 +1 8100 . A G 100 PASS . GT 0/1 +1 8150 . A G 100 PASS . GT 0/1 +1 8200 . A G 100 PASS . GT 0/1 +1 8250 . A G 100 PASS . GT 0/1 +1 8300 . A G 100 PASS . GT 0/1 +1 8350 . A G 100 PASS . GT 0/1 +1 8400 . A G 100 PASS . GT 0/1 +1 8450 . A G 100 PASS . GT 0/1 +1 8500 . A G 100 PASS . GT 0/1 +1 8550 . A G 100 PASS . GT 0/1 +1 8600 . A G 100 PASS . GT 0/1 +1 8650 . A G 100 PASS . GT 0/1 +1 8700 . A G 100 PASS . GT 0/1 +1 8750 . A G 100 PASS . GT 0/1 +1 8800 . A G 100 PASS . GT 0/1 +1 8850 . A G 100 PASS . GT 0/1 +1 8900 . A G 100 PASS . GT 0/1 +1 8950 . A G 100 PASS . GT 0/1 +1 9000 . A G 100 PASS . GT 0/1 +1 9050 . A G 100 PASS . GT 0/1 +1 9100 . A G 100 PASS . GT 0/1 +1 9150 . A G 100 PASS . GT 0/1 +1 9200 . A G 100 PASS . GT 0/1 +1 9250 . A G 100 PASS . GT 0/1 +1 9300 . A G 100 PASS . GT 0/1 +1 9350 . A G 100 PASS . GT 0/1 +1 9400 . A G 100 PASS . GT 0/1 +1 9450 . A G 100 PASS . GT 0/1 +1 9500 . A G 100 PASS . GT 0/1 +1 9550 . A G 100 PASS . GT 0/1 +1 9600 . A G 100 PASS . GT 0/1 +1 9650 . A G 100 PASS . GT 0/1 +1 9700 . A G 100 PASS . GT 0/1 +1 9750 . A G 100 PASS . GT 0/1 +1 9800 . A G 100 PASS . GT 0/1 +1 9850 . A G 100 PASS . GT 0/1 +1 9900 . A G 100 PASS . GT 0/1 +1 9950 . A G 100 PASS . GT 0/1 +1 10000 . A G 100 PASS . GT 0/1 +1 10050 . A G 100 PASS . GT 0/1 +2 100 . A G 100 PASS . GT 0/1 +2 150 . A G 100 PASS . GT 0/1 +2 200 . A G 100 PASS . GT 0/1 +2 250 . A G 100 PASS . GT 0/1 +2 300 . A G 100 PASS . GT 0/1 +2 350 . A G 100 PASS . GT 0/1 +2 400 . A G 100 PASS . GT 0/1 +2 450 . A G 100 PASS . GT 0/1 +2 500 . A G 100 PASS . GT 0/1 +2 550 . A G 100 PASS . GT 0/1 +2 600 . A G 100 PASS . GT 0/1 +2 650 . A G 100 PASS . GT 0/1 +2 700 . A G 100 PASS . GT 0/1 +2 750 . A G 100 PASS . GT 0/1 +2 800 . A G 100 PASS . GT 0/1 +2 850 . A G 100 PASS . GT 0/1 +2 900 . A G 100 PASS . GT 0/1 +2 950 . A G 100 PASS . GT 0/1 +2 1000 . A G 100 PASS . GT 0/1 +2 1050 . A G 100 PASS . GT 0/1 +2 1100 . A G 100 PASS . GT 0/1 +2 1150 . A G 100 PASS . GT 0/1 +2 1200 . A G 100 PASS . GT 0/1 +2 1250 . A G 100 PASS . GT 0/1 +2 1300 . A G 100 PASS . GT 0/1 +2 1350 . A G 100 PASS . GT 0/1 +2 1400 . A G 100 PASS . GT 0/1 +2 1450 . A G 100 PASS . GT 0/1 +2 1500 . A G 100 PASS . GT 0/1 +2 1550 . A G 100 PASS . GT 0/1 +2 1600 . A G 100 PASS . GT 0/1 +2 1650 . A G 100 PASS . GT 0/1 +2 1700 . A G 100 PASS . GT 0/1 +2 1750 . A G 100 PASS . GT 0/1 +2 1800 . A G 100 PASS . GT 0/1 +2 1850 . A G 100 PASS . GT 0/1 +2 1900 . A G 100 PASS . GT 0/1 +2 1950 . A G 100 PASS . GT 0/1 +2 2000 . A G 100 PASS . GT 0/1 +2 2050 . A G 100 PASS . GT 0/1 +2 2100 . A G 100 PASS . GT 0/1 +2 2150 . A G 100 PASS . GT 0/1 +2 2200 . A G 100 PASS . GT 0/1 +2 2250 . A G 100 PASS . GT 0/1 +2 2300 . A G 100 PASS . GT 0/1 +2 2350 . A G 100 PASS . GT 0/1 +2 2400 . A G 100 PASS . GT 0/1 +2 2450 . A G 100 PASS . GT 0/1 +2 2500 . A G 100 PASS . GT 0/1 +2 2550 . A G 100 PASS . GT 0/1 +2 2600 . A G 100 PASS . GT 0/1 +2 2650 . A G 100 PASS . GT 0/1 +2 2700 . A G 100 PASS . GT 0/1 +2 2750 . A G 100 PASS . GT 0/1 +2 2800 . A G 100 PASS . GT 0/1 +2 2850 . A G 100 PASS . GT 0/1 +2 2900 . A G 100 PASS . GT 0/1 +2 2950 . A G 100 PASS . GT 0/1 +2 3000 . A G 100 PASS . GT 0/1 +2 3050 . A G 100 PASS . GT 0/1 +2 3100 . A G 100 PASS . GT 0/1 +2 3150 . A G 100 PASS . GT 0/1 +2 3200 . A G 100 PASS . GT 0/1 +2 3250 . A G 100 PASS . GT 0/1 +2 3300 . A G 100 PASS . GT 0/1 +2 3350 . A G 100 PASS . GT 0/1 +2 3400 . A G 100 PASS . GT 0/1 +2 3450 . A G 100 PASS . GT 0/1 +2 3500 . A G 100 PASS . GT 0/1 +2 3550 . A G 100 PASS . GT 0/1 +2 3600 . A G 100 PASS . GT 0/1 +2 3650 . A G 100 PASS . GT 0/1 +2 3700 . A G 100 PASS . GT 0/1 +2 3750 . A G 100 PASS . GT 0/1 +2 3800 . A G 100 PASS . GT 0/1 +2 3850 . A G 100 PASS . GT 0/1 +2 3900 . A G 100 PASS . GT 0/1 +2 3950 . A G 100 PASS . GT 0/1 +2 4000 . A G 100 PASS . GT 0/1 +2 4050 . A G 100 PASS . GT 0/1 +2 4100 . A G 100 PASS . GT 0/1 +2 4150 . A G 100 PASS . GT 0/1 +2 4200 . A G 100 PASS . GT 0/1 +2 4250 . A G 100 PASS . GT 0/1 +2 4300 . A G 100 PASS . GT 0/1 +2 4350 . A G 100 PASS . GT 0/1 +2 4400 . A G 100 PASS . GT 0/1 +2 4450 . A G 100 PASS . GT 0/1 +2 4500 . A G 100 PASS . GT 0/1 +2 4550 . A G 100 PASS . GT 0/1 +2 4600 . A G 100 PASS . GT 0/1 +2 4650 . A G 100 PASS . GT 0/1 +2 4700 . A G 100 PASS . GT 0/1 +2 4750 . A G 100 PASS . GT 0/1 +2 4800 . A G 100 PASS . GT 0/1 +2 4850 . A G 100 PASS . GT 0/1 +2 4900 . A G 100 PASS . GT 0/1 +2 4950 . A G 100 PASS . GT 0/1 +2 5000 . A G 100 PASS . GT 0/1 +2 5050 . A G 100 PASS . GT 0/1 +2 5100 . A G 100 PASS . GT 0/1 +2 5150 . A G 100 PASS . GT 0/1 +2 5200 . A G 100 PASS . GT 0/1 +2 5250 . A G 100 PASS . GT 0/1 +2 5300 . A G 100 PASS . GT 0/1 +2 5350 . A G 100 PASS . GT 0/1 +2 5400 . A G 100 PASS . GT 0/1 +2 5450 . A G 100 PASS . GT 0/1 +2 5500 . A G 100 PASS . GT 0/1 +2 5550 . A G 100 PASS . GT 0/1 +2 5600 . A G 100 PASS . GT 0/1 +2 5650 . A G 100 PASS . GT 0/1 +2 5700 . A G 100 PASS . GT 0/1 +2 5750 . A G 100 PASS . GT 0/1 +2 5800 . A G 100 PASS . GT 0/1 +2 5850 . A G 100 PASS . GT 0/1 +2 5900 . A G 100 PASS . GT 0/1 +2 5950 . A G 100 PASS . GT 0/1 +2 6000 . A G 100 PASS . GT 0/1 +2 6050 . A G 100 PASS . GT 0/1 +2 6100 . A G 100 PASS . GT 0/1 +2 6150 . A G 100 PASS . GT 0/1 +2 6200 . A G 100 PASS . GT 0/1 +2 6250 . A G 100 PASS . GT 0/1 +2 6300 . A G 100 PASS . GT 0/1 +2 6350 . A G 100 PASS . GT 0/1 +2 6400 . A G 100 PASS . GT 0/1 +2 6450 . A G 100 PASS . GT 0/1 +2 6500 . A G 100 PASS . GT 0/1 +2 6550 . A G 100 PASS . GT 0/1 +2 6600 . A G 100 PASS . GT 0/1 +2 6650 . A G 100 PASS . GT 0/1 +2 6700 . A G 100 PASS . GT 0/1 +2 6750 . A G 100 PASS . GT 0/1 +2 6800 . A G 100 PASS . GT 0/1 +2 6850 . A G 100 PASS . GT 0/1 +2 6900 . A G 100 PASS . GT 0/1 +2 6950 . A G 100 PASS . GT 0/1 +2 7000 . A G 100 PASS . GT 0/1 +2 7050 . A G 100 PASS . GT 0/1 +2 7100 . A G 100 PASS . GT 0/1 +2 7150 . A G 100 PASS . GT 0/1 +2 7200 . A G 100 PASS . GT 0/1 +2 7250 . A G 100 PASS . GT 0/1 +2 7300 . A G 100 PASS . GT 0/1 +2 7350 . A G 100 PASS . GT 0/1 +2 7400 . A G 100 PASS . GT 0/1 +2 7450 . A G 100 PASS . GT 0/1 +2 7500 . A G 100 PASS . GT 0/1 +2 7550 . A G 100 PASS . GT 0/1 +2 7600 . A G 100 PASS . GT 0/1 +2 7650 . A G 100 PASS . GT 0/1 +2 7700 . A G 100 PASS . GT 0/1 +2 7750 . A G 100 PASS . GT 0/1 +2 7800 . A G 100 PASS . GT 0/1 +2 7850 . A G 100 PASS . GT 0/1 +2 7900 . A G 100 PASS . GT 0/1 +2 7950 . A G 100 PASS . GT 0/1 +2 8000 . A G 100 PASS . GT 0/1 +2 8050 . A G 100 PASS . GT 0/1 +2 8100 . A G 100 PASS . GT 0/1 +2 8150 . A G 100 PASS . GT 0/1 +2 8200 . A G 100 PASS . GT 0/1 +2 8250 . A G 100 PASS . GT 0/1 +2 8300 . A G 100 PASS . GT 0/1 +2 8350 . A G 100 PASS . GT 0/1 +2 8400 . A G 100 PASS . GT 0/1 +2 8450 . A G 100 PASS . GT 0/1 +2 8500 . A G 100 PASS . GT 0/1 +2 8550 . A G 100 PASS . GT 0/1 +2 8600 . A G 100 PASS . GT 0/1 +2 8650 . A G 100 PASS . GT 0/1 +2 8700 . A G 100 PASS . GT 0/1 +2 8750 . A G 100 PASS . GT 0/1 +2 8800 . A G 100 PASS . GT 0/1 +2 8850 . A G 100 PASS . GT 0/1 +2 8900 . A G 100 PASS . GT 0/1 +2 8950 . A G 100 PASS . GT 0/1 +2 9000 . A G 100 PASS . GT 0/1 +2 9050 . A G 100 PASS . GT 0/1 +2 9100 . A G 100 PASS . GT 0/1 +2 9150 . A G 100 PASS . GT 0/1 +2 9200 . A G 100 PASS . GT 0/1 +2 9250 . A G 100 PASS . GT 0/1 +2 9300 . A G 100 PASS . GT 0/1 +2 9350 . A G 100 PASS . GT 0/1 +2 9400 . A G 100 PASS . GT 0/1 +2 9450 . A G 100 PASS . GT 0/1 +2 9500 . A G 100 PASS . GT 0/1 +2 9550 . A G 100 PASS . GT 0/1 +2 9600 . A G 100 PASS . GT 0/1 +2 9650 . A G 100 PASS . GT 0/1 +2 9700 . A G 100 PASS . GT 0/1 +2 9750 . A G 100 PASS . GT 0/1 +2 9800 . A G 100 PASS . GT 0/1 +2 9850 . A G 100 PASS . GT 0/1 +2 9900 . A G 100 PASS . GT 0/1 +2 9950 . A G 100 PASS . GT 0/1 +2 10000 . A G 100 PASS . GT 0/1 +2 10050 . A G 100 PASS . GT 0/1 +3 100 . A G 100 PASS . GT 0/1 +3 150 . A G 100 PASS . GT 0/1 +3 200 . A G 100 PASS . GT 0/1 +3 250 . A G 100 PASS . GT 0/1 +3 300 . A G 100 PASS . GT 0/1 +3 350 . A G 100 PASS . GT 0/1 +3 400 . A G 100 PASS . GT 0/1 +3 450 . A G 100 PASS . GT 0/1 +3 500 . A G 100 PASS . GT 0/1 +3 550 . A G 100 PASS . GT 0/1 +3 600 . A G 100 PASS . GT 0/1 +3 650 . A G 100 PASS . GT 0/1 +3 700 . A G 100 PASS . GT 0/1 +3 750 . A G 100 PASS . GT 0/1 +3 800 . A G 100 PASS . GT 0/1 +3 850 . A G 100 PASS . GT 0/1 +3 900 . A G 100 PASS . GT 0/1 +3 950 . A G 100 PASS . GT 0/1 +3 1000 . A G 100 PASS . GT 0/1 +3 1050 . A G 100 PASS . GT 0/1 +3 1100 . A G 100 PASS . GT 0/1 +3 1150 . A G 100 PASS . GT 0/1 +3 1200 . A G 100 PASS . GT 0/1 +3 1250 . A G 100 PASS . GT 0/1 +3 1300 . A G 100 PASS . GT 0/1 +3 1350 . A G 100 PASS . GT 0/1 +3 1400 . A G 100 PASS . GT 0/1 +3 1450 . A G 100 PASS . GT 0/1 +3 1500 . A G 100 PASS . GT 0/1 +3 1550 . A G 100 PASS . GT 0/1 +3 1600 . A G 100 PASS . GT 0/1 +3 1650 . A G 100 PASS . GT 0/1 +3 1700 . A G 100 PASS . GT 0/1 +3 1750 . A G 100 PASS . GT 0/1 +3 1800 . A G 100 PASS . GT 0/1 +3 1850 . A G 100 PASS . GT 0/1 +3 1900 . A G 100 PASS . GT 0/1 +3 1950 . A G 100 PASS . GT 0/1 +3 2000 . A G 100 PASS . GT 0/1 +3 2050 . A G 100 PASS . GT 0/1 +3 2100 . A G 100 PASS . GT 0/1 +3 2150 . A G 100 PASS . GT 0/1 +3 2200 . A G 100 PASS . GT 0/1 +3 2250 . A G 100 PASS . GT 0/1 +3 2300 . A G 100 PASS . GT 0/1 +3 2350 . A G 100 PASS . GT 0/1 +3 2400 . A G 100 PASS . GT 0/1 +3 2450 . A G 100 PASS . GT 0/1 +3 2500 . A G 100 PASS . GT 0/1 +3 2550 . A G 100 PASS . GT 0/1 +3 2600 . A G 100 PASS . GT 0/1 +3 2650 . A G 100 PASS . GT 0/1 +3 2700 . A G 100 PASS . GT 0/1 +3 2750 . A G 100 PASS . GT 0/1 +3 2800 . A G 100 PASS . GT 0/1 +3 2850 . A G 100 PASS . GT 0/1 +3 2900 . A G 100 PASS . GT 0/1 +3 2950 . A G 100 PASS . GT 0/1 +3 3000 . A G 100 PASS . GT 0/1 +3 3050 . A G 100 PASS . GT 0/1 +3 3100 . A G 100 PASS . GT 0/1 +3 3150 . A G 100 PASS . GT 0/1 +3 3200 . A G 100 PASS . GT 0/1 +3 3250 . A G 100 PASS . GT 0/1 +3 3300 . A G 100 PASS . GT 0/1 +3 3350 . A G 100 PASS . GT 0/1 +3 3400 . A G 100 PASS . GT 0/1 +3 3450 . A G 100 PASS . GT 0/1 +3 3500 . A G 100 PASS . GT 0/1 +3 3550 . A G 100 PASS . GT 0/1 +3 3600 . A G 100 PASS . GT 0/1 +3 3650 . A G 100 PASS . GT 0/1 +3 3700 . A G 100 PASS . GT 0/1 +3 3750 . A G 100 PASS . GT 0/1 +3 3800 . A G 100 PASS . GT 0/1 +3 3850 . A G 100 PASS . GT 0/1 +3 3900 . A G 100 PASS . GT 0/1 +3 3950 . A G 100 PASS . GT 0/1 +3 4000 . A G 100 PASS . GT 0/1 +3 4050 . A G 100 PASS . GT 0/1 +3 4100 . A G 100 PASS . GT 0/1 +3 4150 . A G 100 PASS . GT 0/1 +3 4200 . A G 100 PASS . GT 0/1 +3 4250 . A G 100 PASS . GT 0/1 +3 4300 . A G 100 PASS . GT 0/1 +3 4350 . A G 100 PASS . GT 0/1 +3 4400 . A G 100 PASS . GT 0/1 +3 4450 . A G 100 PASS . GT 0/1 +3 4500 . A G 100 PASS . GT 0/1 +3 4550 . A G 100 PASS . GT 0/1 +3 4600 . A G 100 PASS . GT 0/1 +3 4650 . A G 100 PASS . GT 0/1 +3 4700 . A G 100 PASS . GT 0/1 +3 4750 . A G 100 PASS . GT 0/1 +3 4800 . A G 100 PASS . GT 0/1 +3 4850 . A G 100 PASS . GT 0/1 +3 4900 . A G 100 PASS . GT 0/1 +3 4950 . A G 100 PASS . GT 0/1 +3 5000 . A G 100 PASS . GT 0/1 +3 5050 . A G 100 PASS . GT 0/1 +3 5100 . A G 100 PASS . GT 0/1 +3 5150 . A G 100 PASS . GT 0/1 +3 5200 . A G 100 PASS . GT 0/1 +3 5250 . A G 100 PASS . GT 0/1 +3 5300 . A G 100 PASS . GT 0/1 +3 5350 . A G 100 PASS . GT 0/1 +3 5400 . A G 100 PASS . GT 0/1 +3 5450 . A G 100 PASS . GT 0/1 +3 5500 . A G 100 PASS . GT 0/1 +3 5550 . A G 100 PASS . GT 0/1 +3 5600 . A G 100 PASS . GT 0/1 +3 5650 . A G 100 PASS . GT 0/1 +3 5700 . A G 100 PASS . GT 0/1 +3 5750 . A G 100 PASS . GT 0/1 +3 5800 . A G 100 PASS . GT 0/1 +3 5850 . A G 100 PASS . GT 0/1 +3 5900 . A G 100 PASS . GT 0/1 +3 5950 . A G 100 PASS . GT 0/1 +3 6000 . A G 100 PASS . GT 0/1 +3 6050 . A G 100 PASS . GT 0/1 +3 6100 . A G 100 PASS . GT 0/1 +3 6150 . A G 100 PASS . GT 0/1 +3 6200 . A G 100 PASS . GT 0/1 +3 6250 . A G 100 PASS . GT 0/1 +3 6300 . A G 100 PASS . GT 0/1 +3 6350 . A G 100 PASS . GT 0/1 +3 6400 . A G 100 PASS . GT 0/1 +3 6450 . A G 100 PASS . GT 0/1 +3 6500 . A G 100 PASS . GT 0/1 +3 6550 . A G 100 PASS . GT 0/1 +3 6600 . A G 100 PASS . GT 0/1 +3 6650 . A G 100 PASS . GT 0/1 +3 6700 . A G 100 PASS . GT 0/1 +3 6750 . A G 100 PASS . GT 0/1 +3 6800 . A G 100 PASS . GT 0/1 +3 6850 . A G 100 PASS . GT 0/1 +3 6900 . A G 100 PASS . GT 0/1 +3 6950 . A G 100 PASS . GT 0/1 +3 7000 . A G 100 PASS . GT 0/1 +3 7050 . A G 100 PASS . GT 0/1 +3 7100 . A G 100 PASS . GT 0/1 +3 7150 . A G 100 PASS . GT 0/1 +3 7200 . A G 100 PASS . GT 0/1 +3 7250 . A G 100 PASS . GT 0/1 +3 7300 . A G 100 PASS . GT 0/1 +3 7350 . A G 100 PASS . GT 0/1 +3 7400 . A G 100 PASS . GT 0/1 +3 7450 . A G 100 PASS . GT 0/1 +3 7500 . A G 100 PASS . GT 0/1 +3 7550 . A G 100 PASS . GT 0/1 +3 7600 . A G 100 PASS . GT 0/1 +3 7650 . A G 100 PASS . GT 0/1 +3 7700 . A G 100 PASS . GT 0/1 +3 7750 . A G 100 PASS . GT 0/1 +3 7800 . A G 100 PASS . GT 0/1 +3 7850 . A G 100 PASS . GT 0/1 +3 7900 . A G 100 PASS . GT 0/1 +3 7950 . A G 100 PASS . GT 0/1 +3 8000 . A G 100 PASS . GT 0/1 +3 8050 . A G 100 PASS . GT 0/1 +3 8100 . A G 100 PASS . GT 0/1 +3 8150 . A G 100 PASS . GT 0/1 +3 8200 . A G 100 PASS . GT 0/1 +3 8250 . A G 100 PASS . GT 0/1 +3 8300 . A G 100 PASS . GT 0/1 +3 8350 . A G 100 PASS . GT 0/1 +3 8400 . A G 100 PASS . GT 0/1 +3 8450 . A G 100 PASS . GT 0/1 +3 8500 . A G 100 PASS . GT 0/1 +3 8550 . A G 100 PASS . GT 0/1 +3 8600 . A G 100 PASS . GT 0/1 +3 8650 . A G 100 PASS . GT 0/1 +3 8700 . A G 100 PASS . GT 0/1 +3 8750 . A G 100 PASS . GT 0/1 +3 8800 . A G 100 PASS . GT 0/1 +3 8850 . A G 100 PASS . GT 0/1 +3 8900 . A G 100 PASS . GT 0/1 +3 8950 . A G 100 PASS . GT 0/1 +3 9000 . A G 100 PASS . GT 0/1 +3 9050 . A G 100 PASS . GT 0/1 +3 9100 . A G 100 PASS . GT 0/1 +3 9150 . A G 100 PASS . GT 0/1 +3 9200 . A G 100 PASS . GT 0/1 +3 9250 . A G 100 PASS . GT 0/1 +3 9300 . A G 100 PASS . GT 0/1 +3 9350 . A G 100 PASS . GT 0/1 +3 9400 . A G 100 PASS . GT 0/1 +3 9450 . A G 100 PASS . GT 0/1 +3 9500 . A G 100 PASS . GT 0/1 +3 9550 . A G 100 PASS . GT 0/1 +3 9600 . A G 100 PASS . GT 0/1 +3 9650 . A G 100 PASS . GT 0/1 +3 9700 . A G 100 PASS . GT 0/1 +3 9750 . A G 100 PASS . GT 0/1 +3 9800 . A G 100 PASS . GT 0/1 +3 9850 . A G 100 PASS . GT 0/1 +3 9900 . A G 100 PASS . GT 0/1 +3 9950 . A G 100 PASS . GT 0/1 +3 10000 . A G 100 PASS . GT 0/1 +3 10050 . A G 100 PASS . GT 0/1 diff --git a/test/test-bcf-sr.c b/test/test-bcf-sr.c index 1cd198fa0..94fb18dc1 100644 --- a/test/test-bcf-sr.c +++ b/test/test-bcf-sr.c @@ -62,8 +62,12 @@ void HTS_NORETURN usage(int exit_code) fprintf(stderr, " -O, --output-fmt fmt: vcf,bcf,summary\n"); fprintf(stderr, " -p, --pair logic: snps,indels,both,snps+ref,indels+ref,both+ref,exact,some,all\n"); fprintf(stderr, " -r, --regions comma-separated list of regions\n"); + fprintf(stderr, " -R, --regions-file BED/tab file of regions (exercises bcf_sr_set_regions is_file=1)\n"); fprintf(stderr, " -t, --targets comma-separated list of targets\n"); + fprintf(stderr, " -T, --targets-file file of targets (exercises bcf_sr_set_targets is_file=1)\n"); fprintf(stderr, " -u, --usefptr use hfile pointer interface on reader addition\n"); + fprintf(stderr, " --auto-targets-from-regions\n"); + fprintf(stderr, " set BCF_SR_AUTO_TARGETS_FROM_REGIONS (sniff-and-promote -R)\n"); fprintf(stderr, "\n"); exit(exit_code); } @@ -131,18 +135,23 @@ int main(int argc, char *argv[]) {"output-fmt",required_argument,NULL,'O'}, {"pair",required_argument,NULL,'p'}, {"regions",required_argument,NULL,'r'}, + {"regions-file",required_argument,NULL,'R'}, {"targets",required_argument,NULL,'t'}, + {"targets-file",required_argument,NULL,'T'}, {"no-index",no_argument,NULL,1000}, {"args",no_argument,NULL,1001}, + {"auto-targets-from-regions",no_argument,NULL,1002}, {"usefptr",no_argument,NULL,'u'}, {NULL,0,NULL,0} }; int c, pair = 0, use_index = 1, use_fofn = 1, usefptr = 0; + int auto_targets_from_regions = 0; enum htsExactFormat out_fmt = text_format; // for original pos + alleles const char *out_fn = NULL, *regions = NULL, *targets = NULL; + int regions_is_file = 0, targets_is_file = 0; htsFile **htsfp = NULL; - while ((c = getopt_long(argc, argv, "o:O:p:r:t:hu", loptions, NULL)) >= 0) + while ((c = getopt_long(argc, argv, "o:O:p:r:R:t:T:hu", loptions, NULL)) >= 0) { switch (c) { @@ -173,15 +182,26 @@ int main(int argc, char *argv[]) case 'r': regions = optarg; break; + case 'R': + regions = optarg; + regions_is_file = 1; + break; case 't': targets = optarg; break; + case 'T': + targets = optarg; + targets_is_file = 1; + break; case 1000: use_index = 0; break; case 1001: use_fofn = 0; break; + case 1002: + auto_targets_from_regions = 1; + break; case 'u': usefptr = 1; //use htsfile interface instead of fname i/f break; @@ -211,16 +231,19 @@ int main(int argc, char *argv[]) } else { bcf_sr_set_opt(sr, BCF_SR_ALLOW_NO_IDX); } + if (auto_targets_from_regions) { + bcf_sr_set_opt(sr, BCF_SR_AUTO_TARGETS_FROM_REGIONS); + } if (regions) { - if (bcf_sr_set_regions(sr, regions, 0) != 0) + if (bcf_sr_set_regions(sr, regions, regions_is_file) != 0) error("Failed to set regions\n"); } if (targets) { - if (bcf_sr_set_targets(sr, targets, 0, 0) != 0) + if (bcf_sr_set_targets(sr, targets, targets_is_file, 0) != 0) error("Failed to set targets\n"); } diff --git a/test/test.pl b/test/test.pl index eaa65ea30..358febd4f 100755 --- a/test/test.pl +++ b/test/test.pl @@ -59,6 +59,7 @@ run_test('test_bcf_sr_no_index',$opts); run_test('test_bcf_sr_range', $opts); run_test('test_bcf_sr_hreader', $opts); +run_test('test_bcf_sr_regions_fastpath', $opts); run_test('test_command',$opts,cmd=>'test-bcf-translate -',out=>'test-bcf-translate.out'); run_test('test_convert_padded_header',$opts); run_test('test_rebgzip',$opts); @@ -1425,6 +1426,51 @@ sub test_bcf_sr_hreader { passed($opts, $test); } +sub test_bcf_sr_regions_fastpath { + # Regression test for samtools/bcftools#2557. With + # BCF_SR_AUTO_TARGETS_FROM_REGIONS set, bcf_sr_set_regions(file=1) sniffs + # the BED and routes dense single-base panels through the streaming- + # targets path. The fixture has 300 entries on 3 contigs spaced ~100bp + # apart, so it clears the sniffer's count and density gates. + # + # Three runs over the same fixture, all required to match: + # - slow path: -R FILE (per-region tbx_itr_queryi) + # - fastpath: -R FILE + auto-targets opt (sniff + set_targets) + # - -T control: -T FILE (set_targets directly) + # Slow path and fastpath take genuinely different code paths inside + # bcf_sr_next_line(), so a regression in either is visible in their diff + # (avoids the prior tautology where -R-fastpath literally re-invoked + # set_targets). The -T diff additionally pins regions/targets parity. + my ($opts, %args) = @_; + my $test = "test_bcf_sr_regions_fastpath"; + my $vcfdir = "$$opts{path}/bcf-sr"; + print "$test:\n"; + + # bgzip + tabix-index the fixture BED and VCF in the tmp dir. + foreach my $base ('regions-fastpath.bed', 'regions-fastpath.vcf') { + my $preset = ($base =~ /\.bed$/) ? 'bed' : 'vcf'; + my ($ret) = _cmd("cp $vcfdir/$base $$opts{tmp}/ && $$opts{bin}/bgzip -f $$opts{tmp}/$base && $$opts{bin}/tabix -p $preset $$opts{tmp}/$base.gz"); + if ($ret) { failed($opts, $test, "Failed to prepare fixture $base"); return; } + } + my $vcf = "$$opts{tmp}/regions-fastpath.vcf.gz"; + my $bed = "$$opts{tmp}/regions-fastpath.bed.gz"; + + my $out_slow = "$$opts{tmp}/regions-fastpath.slow.out.vcf"; + my $out_fast = "$$opts{tmp}/regions-fastpath.fast.out.vcf"; + my $out_T = "$$opts{tmp}/regions-fastpath.T.out.vcf"; + my ($r1) = _cmd("$$opts{path}/test-bcf-sr -O vcf -o $out_slow -R $bed --args $vcf"); + if ($r1) { failed($opts, $test, "test-bcf-sr -R (slow path) failed"); return; } + my ($r2) = _cmd("$$opts{path}/test-bcf-sr -O vcf -o $out_fast --auto-targets-from-regions -R $bed --args $vcf"); + if ($r2) { failed($opts, $test, "test-bcf-sr -R (fastpath) failed"); return; } + my ($r3) = _cmd("$$opts{path}/test-bcf-sr -O vcf -o $out_T -T $bed --args $vcf"); + if ($r3) { failed($opts, $test, "test-bcf-sr -T (control) failed"); return; } + my ($r4) = _cmd("diff $out_slow $out_fast"); + if ($r4) { failed($opts, $test, "fastpath output differs from slow path"); return; } + my ($r5) = _cmd("diff $out_slow $out_T"); + if ($r5) { failed($opts, $test, "slow-path output differs from -T control"); return; } + passed($opts, $test); +} + sub test_command { my ($opts, %args) = @_;