Skip to content

Commit

Permalink
Release 1.20
Browse files Browse the repository at this point in the history
  • Loading branch information
daviesrob committed Apr 15, 2024
2 parents bb75b76 + 67974ca commit 02ee548
Show file tree
Hide file tree
Showing 119 changed files with 6,825 additions and 923 deletions.
27 changes: 26 additions & 1 deletion LICENSE
Expand Up @@ -9,7 +9,7 @@ the INSTALL document), the use of this software is governed by the GPL license.

The MIT/Expat License

Copyright (C) 2012-2023 Genome Research Ltd.
Copyright (C) 2012-2024 Genome Research Ltd.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -772,3 +772,28 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-----------------------------------------------------------------------------

License for edlib.[ch]

The MIT License (MIT)

Copyright (c) 2014 Martin Šošić

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 changes: 13 additions & 8 deletions Makefile
Expand Up @@ -40,9 +40,10 @@ OBJS = main.o vcfindex.o tabix.o \
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o tsv2vcf.o \
vcfcnv.o vcfhead.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
regidx.o smpl_ilist.o csq.o vcfbuf.o \
mpileup.o bam2bcf.o bam2bcf_indel.o bam2bcf_iaux.o read_consensus.o bam_sample.o \
mpileup.o bam2bcf.o bam2bcf_indel.o bam2bcf_iaux.o bam2bcf_edlib.o \
read_consensus.o bam_sample.o \
vcfsort.o cols.o extsort.o dist.o abuf.o \
ccall.o em.o prob1.o kmin.o str_finder.o gff.o
ccall.o em.o prob1.o kmin.o str_finder.o gff.o edlib.o
PLUGIN_OBJS = vcfplugin.o

prefix = /usr/local
Expand Down Expand Up @@ -104,7 +105,7 @@ endif

include config.mk

PACKAGE_VERSION = 1.19
PACKAGE_VERSION = 1.20

# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
# description of the working tree: either a release tag with the same value
Expand Down Expand Up @@ -142,7 +143,9 @@ print-version:
ifdef USE_GPL
main.o : EXTRA_CPPFLAGS += -DUSE_GPL
OBJS += polysomy.o peakfit.o
GSL_LIBS ?= -lgsl -lcblas
ifndef GSL_LIBS
GSL_LIBS += -lgsl -lcblas
endif
endif

print-%:
Expand Down Expand Up @@ -232,6 +235,7 @@ vcfbuf_h = vcfbuf.h $(htslib_vcf_h)
abuf_h = abuf.h $(htslib_vcf_h)
dbuf_h = dbuf.h $(htslib_vcf_h)
bam2bcf_h = bam2bcf.h $(htslib_hts_h) $(htslib_vcf_h)
edlib.h = edlib.h
bam_sample_h = bam_sample.h $(htslib_sam_h)
cigar_state_h = cigar_state.h $(htslib_hts_h) $(htslib_sam_h)
read_consensus_h = read_consensus.h $(htslib_hts_h) $(htslib_sam_h)
Expand All @@ -242,8 +246,8 @@ main.o: main.c $(htslib_hts_h) config.h version.h $(bcftools_h)
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) $(convert_h) $(smpl_ilist_h) regidx.h $(htslib_khash_h) $(dbuf_h)
vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h)
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h) regidx.h $(vcfbuf_h)
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(bcftools_h)
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) $(bcftools_h)
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(htslib_hts_endian_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h regidx.h
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(htslib_bgzf_h) $(bcftools_h) extsort.h filter.h
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
Expand All @@ -261,10 +265,10 @@ vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfu
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) $(khash_str2str_h)
tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h)
ccall.o: ccall.c $(htslib_kfunc_h) $(call_h) kmin.h $(prob1_h)
convert.o: convert.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) $(variantkey_h) $(convert_h) $(filter_h)
convert.o: convert.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(htslib_hts_endian_h) $(bcftools_h) $(variantkey_h) $(convert_h) $(filter_h)
tsv2vcf.o: tsv2vcf.c $(tsv2vcf_h)
em.o: em.c $(htslib_vcf_h) kmin.h $(call_h)
filter.o: filter.c $(htslib_khash_str2int_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) config.h $(filter_h) $(bcftools_h)
filter.o: filter.c $(htslib_khash_str2int_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) $(htslib_hts_endian_h) config.h $(filter_h) $(bcftools_h)
$(CC) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(PERL_CFLAGS) -c -o $@ $<
gvcf.o: gvcf.c $(gvcf_h) $(bcftools_h)
kmin.o: kmin.c kmin.h
Expand All @@ -283,6 +287,7 @@ mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(hts
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) $(str_finder_h)
bam2bcf_iaux.o: bam2bcf_iaux.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bcftools_h) $(bam2bcf_h) $(htslib_ksort_h) $(read_consensus_h) $(cigar_state_h)
bam2bcf_edlib.o: bam2bcf_edlib.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bcftools_h) $(bam2bcf_h) $(htslib_ksort_h) $(read_consensus_h) $(cigar_state_h) $(edlib.h)
read_consensus.o: read_consensus.c $(read_consensus_h) $(cigar_state_h) $(bcftools_h) kheap.h
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
version.o: version.h version.c
Expand Down
110 changes: 103 additions & 7 deletions NEWS
@@ -1,3 +1,99 @@
## Release 1.20 (15th April 2024)


Changes affecting the whole of bcftools, or multiple commands:

* Add short option -W for --write-index. The option now accepts an optional parameter
which allows to choose between TBI and CSI index format.


Changes affecting specific commands:

* bcftools consensus

- Add new --regions-overlap option which allows to take into account overlapping deletions
that start out of the fasta file target region.

* bcftools isec

- Add new option `-l, --file-list` to read the list of file names from a file

* bcftools merge

- Add new option `--force-single` to support single-file edge case (#2100)

* bcftools mpileup

- Add new option --indels-cns for an alternative indel calling model, which should increase
the speed on long read data (thanks to using edlib) and the precision (thanks to a number
of heuristics).

* bcftools norm

- Change the order of atomization and multiallelic splitting (when both -a,-m are given)
from "atomize first, then split" to "split first, then atomize". This usually results
in a simpler VCF representation. The previous behaviour can be achieved by explicitly
streaming the output of the --atomize command into the --multiallelics splitting command.

- Fix Type=String multiallelic splitting for Number=A,R,G tags with incorrect number
of values.

- Merging into multiallelic sites with `bcftools norm -m +indels` did not work. This is
now fixed and the merging is now more strict about variant types, for example complex
events, such as AC>TGA, are not considered as indels anymore (#2084)

* bcftools reheader

- Allow reading the input file from a stream with --fai (#2088)

* bcftools +setGT

- Support for custom genotypes based on the allele with higher depth, such
as `--new-gt c:0/X` custom genotypes (#2065)

* bcftools +split-vep

- When only one of the tags is present, automatically choose INFO/BCSQ (the default
tag name produced by `bcftools csq`) or INFO/CSQ (produced by VEP). When both
tags are present, use the default INFO/CSQ.

- Transcript selection by MANE, PICK, and user-defined transcripts, for example

--select CANONICAL=YES
--select MANE_SELECT!=""
--select PolyPhen~probably_damaging

- Select all matching transcripts via --select, not just one

- Change automatic type parsing of VEP fields DNA_position, CDS_position, and Protein_position
from Integer to String, as it can be of the form "8586-8599/9231". The type Integer can be
still enforced with `-c cDNA_position:int,CDS_position:int,Protein_position:int`.

- Recognize `-c field:str`, not just `-c field:string`, as advertised in the usage page

- Fix a bug which made filtering expression containing missing values crash (#2098)

* bcftools stats

- When GT is missing but AD is present, the program determines the alternate allele from AD.
However, if the AD tag has incorrect number of values, the program would exit with an error
printing "Requested allele outside valid range". This is now fixed by taking into account
the actual number of ALT alleles.

* bcftools +tag2tag

- Support for conversion from tags using localized alleles (e.g. LPL, LAD) to the family of
standard tags (PL, AD)

* bcftools +trio-dnm2

- Extend --strictly-novel to exclude cases where the non-Mendelian allele
is the reference allele. The change is motivated by the observation that
this class of variants is enriched for errors (especially for indels),
and better corresponds with the option name.



## Release 1.19 (12th December 2023)


Expand Down Expand Up @@ -338,7 +434,7 @@ Changes affecting specific commands:

* bcftools norm

- New --multi-overlaps option allows to set overlapping alleles either to the
- New --multi-overlaps option allows setting overlapping alleles either to the
ref allele (the current default) or to a missing allele (#1764 and #1802)

- Fixed a bug in `-m -` which does not split missing FORMAT values correctly and
Expand Down Expand Up @@ -509,7 +605,7 @@ Changes affecting specific commands:
- In addition to `--rename-annots`, which requires a file with name mappings,
it is now possible to do the same on the command line `-c NEW_TAG:=OLD_TAG`

- Add new option --min-overlap which allows to specify the minimum required
- Add new option --min-overlap to specify the minimum required
overlap of intersecting regions

- Allow to transfer ALT from VCF with or without replacement using
Expand Down Expand Up @@ -569,7 +665,7 @@ Changes affecting specific commands:
* bcftools query

- Make the `--samples` and `--samples-file` options work also in the `--list-samples`
mode. Add a new `--force-samples` option which allows to proceed even when some of
mode. Add a new `--force-samples` option which enables proceeding even when some of
the requested samples are not present in the VCF (#1631)

* bcftools +setGT
Expand Down Expand Up @@ -682,7 +778,7 @@ Changes affecting specific commands:

* bcftools mpileup:

- new --indel-size option which allows to increase the maximum considered
- new --indel-size option which allows increase of the maximum considered
indel size considered, large deletions in long read data are otherwise
lost.

Expand Down Expand Up @@ -903,7 +999,7 @@ Changes affecting specific commands:

- New `--rename-annots` option to help fix broken VCFs (#1335)

- New -C option allows to read a long list of options from a file to
- New -C option allows a long list of options to be read from a file to
prevent very long command lines.

- New `append-missing` logic allows annotations to be added for each ALT
Expand Down Expand Up @@ -1114,7 +1210,7 @@ Changes affecting specific commands:

- Preserve the case of the genome reference. (#1150)

- Add new `-a, --absent` option which allows to set positions with no
- Add new `-a, --absent` option which allows setting positions with no
supporting evidence to "N" (or any other character). (#848; #940)

* bcftools convert:
Expand Down Expand Up @@ -1162,7 +1258,7 @@ Changes affecting specific commands:
- Local alleles merging that produce LAA and LPL when requested, a draft
implementation of https://github.com/samtools/hts-specs/pull/434 (#1138)

- New `--no-index` which allows to merge unindexed files. Requires the input
- New `--no-index` which allows unindexed files to be merged. Requires the input
files to have chromosomes in th same order and consistent with the order
of sequences in the header. (PR #1253; samtools/htslib#1089)

Expand Down
10 changes: 9 additions & 1 deletion abuf.c
@@ -1,6 +1,6 @@
/* The MIT License
Copyright (c) 2021-2023 Genome Research Ltd.
Copyright (c) 2021-2024 Genome Research Ltd.
Author: Petr Danecek <pd3@sanger.ac.uk>
Expand Down Expand Up @@ -418,6 +418,14 @@ static void _split_table_set_history(abuf_t *buf)
{
int i,j,ret;
bcf1_t *rec = buf->split.rec;

// Don't update if the tag already exists. This is to prevent -a from overwriting -m
int m = 0;
char *tmp = NULL;
ret = bcf_get_info_string(buf->hdr,rec,buf->split.info_tag,&tmp,&m);
free(tmp);
if ( ret>0 ) return;

buf->tmps.l = 0;
ksprintf(&buf->tmps,"%s|%"PRIhts_pos"|%s|",bcf_seqname(buf->hdr,rec),rec->pos+1,rec->d.allele[0]);
for (i=1; i<rec->n_allele; i++)
Expand Down

0 comments on commit 02ee548

Please sign in to comment.