Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bcftools merge overhaul from @pd3 fork
Major overhaul of merge to accommodate merging of gvcf files produced by the new bcftools mpileup. Update also closes a number of long standing issues. Closes #412, #408, #361, #296 and possibly resolves #401 [NEWS] Major overhaul of `bcftools merge` to allow merging of gvcf files produces by `bcftools mpileup`
- Loading branch information
Showing
14 changed files
with
925 additions
and
343 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
##fileformat=VCFv4.1 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present"> | ||
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3"> | ||
##FILTER=<ID=nc,Description="No-call"> | ||
##contig=<ID=chrY,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens"> | ||
##contig=<ID=chrM,length=16569,assembly=B37,md5=c68f52674c9fb33aef52dcf399755519,species="Homo sapiens"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> | ||
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=EHQ,Number=2,Type=Integer,Description="Haplotype Quality, Equal Allele Fraction Assumption"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping"> | ||
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> | ||
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> | ||
##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of mate breakend"> | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele"> | ||
##INFO=<ID=CGA_XR,Number=A,Type=String,Description="Per-ALT external database reference (dbSNP, COSMIC, etc)"> | ||
##INFO=<ID=CGA_BF,Number=1,Type=Float,Description="Frequency in baseline"> | ||
##INFO=<ID=CGA_FI,Number=A,Type=String,Description="Functional impact annotation"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record"> | ||
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block"> | ||
##contig=<ID=chr1> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT G06 D05 H09 | ||
chr1 10106 . C . 0 LowGQX BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF ./.:.:.:. 0/0:12:5:0 ./.:.:.:. | ||
chr1 10107 . C . 0 LowGQX;HighDPFRatio BLOCKAVG_min30p3a;AN=4 GT:GQX:DP:DPF .:.:0:1 0/0:12:5:0 0/0:5:2:0 | ||
chr1 10108 . N . 0 LowGQX;HighDPFRatio END=10110;BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF .:.:0:1 ./.:.:.:. 0/0:5:2:0 | ||
chr1 10111 . N . 0 LowGQX END=10120;BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF ./.:.:.:. ./.:.:.:. 0/0:5:2:0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
##fileformat=VCFv4.1 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present"> | ||
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3"> | ||
##FILTER=<ID=nc,Description="No-call"> | ||
##contig=<ID=chrY,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens"> | ||
##contig=<ID=chrM,length=16569,assembly=B37,md5=c68f52674c9fb33aef52dcf399755519,species="Homo sapiens"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> | ||
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=EHQ,Number=2,Type=Integer,Description="Haplotype Quality, Equal Allele Fraction Assumption"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping"> | ||
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> | ||
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> | ||
##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of mate breakend"> | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele"> | ||
##INFO=<ID=CGA_XR,Number=A,Type=String,Description="Per-ALT external database reference (dbSNP, COSMIC, etc)"> | ||
##INFO=<ID=CGA_BF,Number=1,Type=Float,Description="Frequency in baseline"> | ||
##INFO=<ID=CGA_FI,Number=A,Type=String,Description="Functional impact annotation"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record"> | ||
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT G06 | ||
chr1 10107 . C . 0.00 LowGQX;HighDPFRatio END=10110;BLOCKAVG_min30p3a GT:GQX:DP:DPF .:.:0:1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
##fileformat=VCFv4.1 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present"> | ||
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3"> | ||
##FILTER=<ID=nc,Description="No-call"> | ||
##contig=<ID=chrY,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens"> | ||
##contig=<ID=chrM,length=16569,assembly=B37,md5=c68f52674c9fb33aef52dcf399755519,species="Homo sapiens"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> | ||
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=EHQ,Number=2,Type=Integer,Description="Haplotype Quality, Equal Allele Fraction Assumption"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping"> | ||
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> | ||
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> | ||
##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of mate breakend"> | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele"> | ||
##INFO=<ID=CGA_XR,Number=A,Type=String,Description="Per-ALT external database reference (dbSNP, COSMIC, etc)"> | ||
##INFO=<ID=CGA_BF,Number=1,Type=Float,Description="Frequency in baseline"> | ||
##INFO=<ID=CGA_FI,Number=A,Type=String,Description="Functional impact annotation"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record"> | ||
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT D05 | ||
chr1 10106 . C . 0.00 LowGQX END=10107;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:12:5:0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
##fileformat=VCFv4.1 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present"> | ||
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3"> | ||
##FILTER=<ID=nc,Description="No-call"> | ||
##contig=<ID=chrY,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens"> | ||
##contig=<ID=chrM,length=16569,assembly=B37,md5=c68f52674c9fb33aef52dcf399755519,species="Homo sapiens"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> | ||
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=EHQ,Number=2,Type=Integer,Description="Haplotype Quality, Equal Allele Fraction Assumption"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping"> | ||
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> | ||
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> | ||
##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of mate breakend"> | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele"> | ||
##INFO=<ID=CGA_XR,Number=A,Type=String,Description="Per-ALT external database reference (dbSNP, COSMIC, etc)"> | ||
##INFO=<ID=CGA_BF,Number=1,Type=Float,Description="Frequency in baseline"> | ||
##INFO=<ID=CGA_FI,Number=A,Type=String,Description="Functional impact annotation"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record"> | ||
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT H09 | ||
chr1 10107 . C . 0.00 LowGQX END=10120;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:5:2:0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta | ||
##contig=<ID=3,length=243199373> | ||
##contig=<ID=2,length=243199373> | ||
##contig=<ID=1,length=243199373> | ||
##contig=<ID=4,length=243199373> | ||
##contig=<ID=8,length=243199373> | ||
##contig=<ID=5,length=243199373> | ||
##contig=<ID=6,length=243199373> | ||
##contig=<ID=7,length=243199373> | ||
##INFO=<ID=QS,Number=R,Type=Float,Description="Auxiliary tag used for calling"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases"> | ||
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> | ||
##INFO=<ID=MinDP,Number=1,Type=Integer,Description="Minimum per-sample depth in this gVCF block"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA | ||
2 21444416 . G <*> . . END=21444429;MinDP=5;QS=1,0 PL:DP 0,15,125:5 | ||
2 21444430 . TCAA T,TAA 0 . QS=0.603659,0.304878,0.0914634 PL:DP:DV 37,0,79,35,73,113:5:2 | ||
2 21444431 . C <*> . . MinDP=4;QS=1,0 PL:DP 0,12,110:4 | ||
2 21444431 . CA C 0 . QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 | ||
2 21444433 . C <*> 0 . END=21444444;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 | ||
3 1 . C <*> 0 . END=10;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 | ||
1 1619670 . C <*> 0 . END=1619877;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 | ||
4 20000975 . C <*> 0 . END=20001070;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 | ||
4 20001071 . T G,<*> 0 . . PL:DP:DV 0,4,10:4:1 | ||
5 110285 . TAACCCC T . . . PL 89,6,0 | ||
5 1110285 . T TAACCCC . . . PL 89,6,0 | ||
6 600 . T A . . END=666 PL 66,1,1 | ||
7 701 . T A . . END=702 PL 77,1,1 | ||
7 703 . T A . . END=777 PL 77,1,2 | ||
8 1 . T A . . END=10 PL 88,1,1 |
Oops, something went wrong.