Skip to content

Commit

Permalink
Release 1.19
Browse files Browse the repository at this point in the history
  • Loading branch information
daviesrob committed Dec 12, 2023
2 parents 6c2c1e9 + 37cf779 commit bb75b76
Show file tree
Hide file tree
Showing 87 changed files with 2,035 additions and 743 deletions.
29 changes: 16 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ endif

include config.mk

PACKAGE_VERSION = 1.18
PACKAGE_VERSION = 1.19

# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
# description of the working tree: either a release tag with the same value
Expand Down Expand Up @@ -233,28 +233,31 @@ abuf_h = abuf.h $(htslib_vcf_h)
dbuf_h = dbuf.h $(htslib_vcf_h)
bam2bcf_h = bam2bcf.h $(htslib_hts_h) $(htslib_vcf_h)
bam_sample_h = bam_sample.h $(htslib_sam_h)
cigar_state_h = cigar_state.h $(htslib_hts_h) $(htslib_sam_h)
read_consensus_h = read_consensus.h $(htslib_hts_h) $(htslib_sam_h)
str_finder_h = str_finder.h utlist.h

str_finder.o: str_finder.h utlist.h
str_finder.o: str_finder.c $(str_finder_h) utlist.h
main.o: main.c $(htslib_hts_h) config.h version.h $(bcftools_h)
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) $(convert_h) $(smpl_ilist_h) regidx.h $(htslib_khash_h) $(dbuf_h)
vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h)
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h) regidx.h $(vcfbuf_h)
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(bcftools_h)
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h regidx.h
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(bcftools_h) extsort.h
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(htslib_bgzf_h) $(bcftools_h) extsort.h filter.h
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) $(filter_h)
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h gff.h
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(htslib_kbitset_h) $(htslib_hts_endian_h) $(bcftools_h) regidx.h vcmp.h $(htslib_khash_h) $(htslib_kbitset_h)
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h gff.h regidx.h
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(smpl_ilist_h)
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h
vcfhead.o: vcfhead.c $(htslib_kstring_h) $(htslib_vcf_h) $(bcftools_h)
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h)
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_hts_os_h) kheap.h $(bcftools_h)
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h dist.h
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h)
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h)
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) $(khash_str2str_h)
tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h)
ccall.o: ccall.c $(htslib_kfunc_h) $(call_h) kmin.h $(prob1_h)
Expand All @@ -275,12 +278,12 @@ bin.o: bin.c $(bcftools_h) bin.h
dist.o: dist.c dist.h
cols.o: cols.c cols.h
regidx.o: regidx.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) regidx.h
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h)
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h) $(smpl_ilist_h)
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(htslib_hts_os_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h
bam2bcf_iaux.o: bam2bcf_iaux.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h read_consensus.h cigar_state.h
read_consensus.o: read_consensus.c read_consensus.h cigar_state.h $(htslib_hts_h) $(htslib_sam_h)
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) $(str_finder_h)
bam2bcf_iaux.o: bam2bcf_iaux.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bcftools_h) $(bam2bcf_h) $(htslib_ksort_h) $(read_consensus_h) $(cigar_state_h)
read_consensus.o: read_consensus.c $(read_consensus_h) $(cigar_state_h) $(bcftools_h) kheap.h
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
version.o: version.h version.c
hclust.o: hclust.c $(htslib_hts_h) $(htslib_kstring_h) $(bcftools_h) hclust.h
Expand All @@ -289,8 +292,8 @@ vcfbuf.o: vcfbuf.c $(htslib_vcf_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcf
abuf.o: abuf.c $(htslib_vcf_h) $(bcftools_h) rbuf.h abuf.h
extsort.o: extsort.c $(bcftools_h) extsort.h kheap.h
smpl_ilist.o: smpl_ilist.c $(bcftools_h) $(smpl_ilist_h)
gff.o: gff.c gff.h regidx.h
csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) regidx.h kheap.h $(smpl_ilist_h) rbuf.h
gff.o: gff.c $(htslib_hts_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) gff.h regidx.h
csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_faidx_h) $(htslib_bgzf_h) $(bcftools_h) $(filter_h) regidx.h kheap.h $(smpl_ilist_h) rbuf.h gff.h

# test programs

Expand All @@ -300,7 +303,7 @@ csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(hts
#
# If using MSYS, avoid poor shell expansion via:
# MSYS2_ARG_CONV_EXCL="*" make check
check test-no-plugins: $(PROGRAMS) $(TEST_PROGRAMS) $(BGZIP) $(TABIX)
check-no-plugins test-no-plugins: $(PROGRAMS) $(TEST_PROGRAMS) $(BGZIP) $(TABIX)
./test/test-rbuf
./test/test-regidx
REF_PATH=: ./test/test.pl --exec bgzip=$(BGZIP) --exec tabix=$(TABIX) --htsdir=$(HTSDIR) $${TEST_OPTS:-}
Expand Down
124 changes: 123 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,127 @@
## Release 1.18 (25th July 2023)
## Release 1.19 (12th December 2023)


Changes affecting the whole of bcftools, or multiple commands:

* Filtering expressions can be given a file with list of strings to match, this
was previously possible only for the ID column. For example

ID=@file .. selects lines with ID present in the file
INFO/TAG=@file.txt .. selects lines where TAG has a string value listed in the file
INFO/TAG!=@file.txt .. TAG must not have a string value listed in the file

Allow to query REF,ALT columns directly, for example

-e 'REF="N"'


Changes affecting specific commands:

* bcftools annotate

- Fix `bcftools annotate --mark-sites`, VCF sites overlapping regions in a BED file
were not annotated (#1989)

- Add flexibility to FILTER column transfers and allow transfers within the same file,
across files, and in combination. For examples see
http://samtools.github.io/bcftools/howtos/annotate.html#transfer_filter_to_info

* bcftools call

- Output MIN_DP rather than MinDP in gVCF mode

- New `-*, --keep-unseen-allele` option to output the unobserved allele <*>,
intended for gVCF.

* bcftools head

- New `-s, --samples` option to include the #CHROM header line with samples.

* bcftools gtcheck

- Add output options `-o, --output` and `-O, --output-type`

- Add filtering options `-i, --include` and `-e, --exclude`

- Rename the short option `-e, --error-probability` from lower case to upper
case `-E, --error-probability`

- Changes to the output format, replace the DC section with DCv2:

- adds a new column for the number of matching genotypes

- The --error-probability is newly interpreted as the probability of erroneous
allele rather than genotype. In other words, the calculation of the discordance
score now considers the probability of genotyping error to be different
for HOM and HET genotypes, i.e. P(0/1|dsg=0) > P(1/1|dsg=0).

- fixes in HWE score calculation plus output average HWE score rather
than absolute HWE score

- better description of fields

* bcftools merge

- Add `-m` modifiers to suppress the output of the unseen allele <*> or <NON_REF>
at variant sites (e.g. `-m both,*`) or all sites (e.g. `-m both,**`)

* bcftools mpileup

- Output MIN_DP rather than MinDP in gVCF mode

* bcftools norm

- Add the number of joined lines to the summary output, for example

Lines total/split/joined/realigned/skipped: 6/0/3/0/0

- Allow combining -m and -a with --old-rec-tag (#2020)

- Symbolic <DEL> alleles caused norm to expand REF to the full length of the deletion.
This was not intended and problematic for long deletions, the REF allele should list
one base only (#2029)

* bcftools query

- Add new `-N, --disable-automatic-newline` option for pre-1.18 query formatting behavior
when newline would not be added when missing

- Make the automatic addition of the newline character in a more predictable way and,
when missing, always put it at the end of the expression. In version 1.18 it could
be added at the end of the expression (for per-site expressions) or inside the square
brackets (for per-sample expressions). The new behavior is:

- if the formatting expression contains a newline character, do nothing
- if there is no newline character and -N, --disable-automatic-newline is given, do nothing
- if there is no newline character and -N is not given, insert newline at the end of the expression

See #1969 for details

- Add new `-F, --print-filtered` option to output a default string for samples that would otherwise
be filtered by `-i/-e` expressions.

- Include sample name in the output header with `-H` whenever it makes sense (#1992)

* bcftools +spit-vep

- Fix on the fly filtering involving numeric subfields, e.g. `-i 'MAX_AF<0.001'` (#2039)

- Interpret default column type names (--columns-types) as entire strings, rather than
substrings to avoid unexpected spurious matches (i.e. internally add ^ and $ to all
field names)

* bcftools +trio-dnm2

- Do not flag paternal genotyping errors as de novo mutations. Specifically, when father's
chrX genotype is 0/1 and mother's 0/0, 0/1 in the child will not be marked as DNM.

* bcftools view

- Add new `-A, --trim-unseen-allele` option to remove the unseen allele <*> or <NON_REF>
at variant sites (`-A`) or all sites (`-AA`)


## Release 1.18 (25th July 2023)

Changes affecting the whole of bcftools, or multiple commands:

Expand Down
10 changes: 5 additions & 5 deletions abuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,12 +411,12 @@ static void _split_table_set_info(abuf_t *buf, bcf_info_t *info, merge_rule_t mo
buf->tmp2 = dst.s;
ret = bcf_update_info(buf->out_hdr, out, tag, buf->tmp2, dst.l, type);
}
if ( ret!=0 ) error("An error occurred while updating INFO/%s\n",tag);
if ( ret!=0 ) error("An error occurred while updating INFO/%s (errcode=%d)\n",tag,ret);
}
}
static void _split_table_set_history(abuf_t *buf)
{
int i,j;
int i,j,ret;
bcf1_t *rec = buf->split.rec;
buf->tmps.l = 0;
ksprintf(&buf->tmps,"%s|%"PRIhts_pos"|%s|",bcf_seqname(buf->hdr,rec),rec->pos+1,rec->d.allele[0]);
Expand All @@ -441,8 +441,8 @@ static void _split_table_set_history(abuf_t *buf)
kputc(',',&buf->tmps);
}
buf->tmps.s[--buf->tmps.l] = 0;
if ( (bcf_update_info_string(buf->out_hdr, out, buf->split.info_tag, buf->tmps.s))!=0 )
error("An error occurred while updating INFO/%s\n",buf->split.info_tag);
if ( (ret=bcf_update_info_string(buf->out_hdr, out, buf->split.info_tag, buf->tmps.s))!=0 )
error("An error occurred while updating INFO/%s (errcode=%d)\n",buf->split.info_tag,ret);
}
}
static void _split_table_set_gt(abuf_t *buf)
Expand Down Expand Up @@ -668,7 +668,7 @@ static void _split_table_set_format(abuf_t *buf, bcf_fmt_t *fmt, merge_rule_t mo
#undef BRANCH
ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp2, 3*(1+star_allele)*nsmpl, type);
}
if ( ret!=0 ) error("An error occurred while updating FORMAT/%s\n",tag);
if ( ret!=0 ) error("An error occurred while updating FORMAT/%s (errcode=%d)\n",tag,ret);
}
}
static inline int _is_acgtn(char *seq)
Expand Down
12 changes: 12 additions & 0 deletions bcftools.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,16 @@ static inline int bcf_double_test(double d, uint64_t value)
#define bcf_double_is_missing(x) bcf_double_test((x),bcf_double_missing)
#define bcf_double_is_missing_or_vector_end(x) (bcf_double_test((x),bcf_double_missing) || bcf_double_test((x),bcf_double_vector_end))

static inline int get_unseen_allele(bcf1_t *line)
{
int i;
for (i=1; i<line->n_allele; i++)
{
if ( !strcmp(line->d.allele[i],"<*>") ) return i;
if ( !strcmp(line->d.allele[i],"<NON_REF>") ) return i;
if ( !strcmp(line->d.allele[i],"<X>") ) return i;
}
return 0;
}

#endif
5 changes: 2 additions & 3 deletions call.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ THE SOFTWARE. */
#define CALL_VARONLY (1<<1)
#define CALL_CONSTR_TRIO (1<<2)
#define CALL_CONSTR_ALLELES (1<<3)
//
#define CALL_KEEP_UNSEEN (1<<4)
#define CALL_FMT_PV4 (1<<5)
#define CALL_FMT_GQ (1<<6)
#define CALL_FMT_GP (1<<7)
Expand Down Expand Up @@ -125,8 +125,7 @@ call_t;
void error(const char *format, ...);

/*
* call() - return -1 value on critical error; -2 to skip the site; or the number of non-reference
* alleles on success.
* call() - return -1 value on critical error; -2 to skip the site; or the number of alleles on success
*/
int mcall(call_t *call, bcf1_t *rec); // multiallic and rare-variant calling model
int ccall(call_t *call, bcf1_t *rec); // the default consensus calling model
Expand Down

0 comments on commit bb75b76

Please sign in to comment.