Skip to content

Commit

Permalink
bcftools merge overhaul from @pd3 fork
Browse files Browse the repository at this point in the history
Major overhaul of merge to accommodate merging of gvcf files
produced by the new bcftools mpileup.

Update also closes a number of long standing issues.

Closes #412, #408, #361, #296 and possibly resolves #401

[NEWS] Major overhaul of `bcftools merge` to allow merging
       of gvcf files produces by `bcftools mpileup`
  • Loading branch information
mcshane committed Jul 22, 2016
1 parent 66b6e76 commit ddbda16
Show file tree
Hide file tree
Showing 14 changed files with 925 additions and 343 deletions.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -149,7 +149,7 @@ vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h)
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h)
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h)
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) vcmp.h $(HTSDIR)/htslib/khash.h
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_regidx_h) $(bcftools_h) vcmp.h $(HTSDIR)/htslib/khash.h
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(bcftools_h) rbuf.h
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
vcfroh.o: vcfroh.c $(roh_h)
Expand Down
30 changes: 30 additions & 0 deletions test/gvcf.merge.1.out
@@ -0,0 +1,30 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present">
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3">
##FILTER=<ID=nc,Description="No-call">
##contig=<ID=chrY,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens">
##contig=<ID=chrM,length=16569,assembly=B37,md5=c68f52674c9fb33aef52dcf399755519,species="Homo sapiens">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=EHQ,Number=2,Type=Integer,Description="Haplotype Quality, Equal Allele Fraction Assumption">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping">
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of mate breakend">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele">
##INFO=<ID=CGA_XR,Number=A,Type=String,Description="Per-ALT external database reference (dbSNP, COSMIC, etc)">
##INFO=<ID=CGA_BF,Number=1,Type=Float,Description="Frequency in baseline">
##INFO=<ID=CGA_FI,Number=A,Type=String,Description="Functional impact annotation">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block">
##contig=<ID=chr1>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT G06 D05 H09
chr1 10106 . C . 0 LowGQX BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF ./.:.:.:. 0/0:12:5:0 ./.:.:.:.
chr1 10107 . C . 0 LowGQX;HighDPFRatio BLOCKAVG_min30p3a;AN=4 GT:GQX:DP:DPF .:.:0:1 0/0:12:5:0 0/0:5:2:0
chr1 10108 . N . 0 LowGQX;HighDPFRatio END=10110;BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF .:.:0:1 ./.:.:.:. 0/0:5:2:0
chr1 10111 . N . 0 LowGQX END=10120;BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF ./.:.:.:. ./.:.:.:. 0/0:5:2:0
26 changes: 26 additions & 0 deletions test/gvcf.merge.1.vcf
@@ -0,0 +1,26 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present">
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3">
##FILTER=<ID=nc,Description="No-call">
##contig=<ID=chrY,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens">
##contig=<ID=chrM,length=16569,assembly=B37,md5=c68f52674c9fb33aef52dcf399755519,species="Homo sapiens">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=EHQ,Number=2,Type=Integer,Description="Haplotype Quality, Equal Allele Fraction Assumption">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping">
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of mate breakend">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele">
##INFO=<ID=CGA_XR,Number=A,Type=String,Description="Per-ALT external database reference (dbSNP, COSMIC, etc)">
##INFO=<ID=CGA_BF,Number=1,Type=Float,Description="Frequency in baseline">
##INFO=<ID=CGA_FI,Number=A,Type=String,Description="Functional impact annotation">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT G06
chr1 10107 . C . 0.00 LowGQX;HighDPFRatio END=10110;BLOCKAVG_min30p3a GT:GQX:DP:DPF .:.:0:1
26 changes: 26 additions & 0 deletions test/gvcf.merge.2.vcf
@@ -0,0 +1,26 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present">
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3">
##FILTER=<ID=nc,Description="No-call">
##contig=<ID=chrY,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens">
##contig=<ID=chrM,length=16569,assembly=B37,md5=c68f52674c9fb33aef52dcf399755519,species="Homo sapiens">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=EHQ,Number=2,Type=Integer,Description="Haplotype Quality, Equal Allele Fraction Assumption">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping">
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of mate breakend">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele">
##INFO=<ID=CGA_XR,Number=A,Type=String,Description="Per-ALT external database reference (dbSNP, COSMIC, etc)">
##INFO=<ID=CGA_BF,Number=1,Type=Float,Description="Frequency in baseline">
##INFO=<ID=CGA_FI,Number=A,Type=String,Description="Functional impact annotation">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT D05
chr1 10106 . C . 0.00 LowGQX END=10107;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:12:5:0
26 changes: 26 additions & 0 deletions test/gvcf.merge.3.vcf
@@ -0,0 +1,26 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present">
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3">
##FILTER=<ID=nc,Description="No-call">
##contig=<ID=chrY,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens">
##contig=<ID=chrM,length=16569,assembly=B37,md5=c68f52674c9fb33aef52dcf399755519,species="Homo sapiens">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=EHQ,Number=2,Type=Integer,Description="Haplotype Quality, Equal Allele Fraction Assumption">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping">
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=MATEID,Number=1,Type=String,Description="ID of mate breakend">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele">
##INFO=<ID=CGA_XR,Number=A,Type=String,Description="Per-ALT external database reference (dbSNP, COSMIC, etc)">
##INFO=<ID=CGA_BF,Number=1,Type=Float,Description="Frequency in baseline">
##INFO=<ID=CGA_FI,Number=A,Type=String,Description="Functional impact annotation">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT H09
chr1 10107 . C . 0.00 LowGQX END=10120;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:5:2:0
4 changes: 2 additions & 2 deletions test/merge.2.both.out
Expand Up @@ -21,12 +21,12 @@
##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B 2:A 2:B
1 3000000 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245 ./.:. ./.:.
1 3000000 . C G 59.2 PASS AN=4;AC=2 GT:GQ ./.:. ./.:. 0/1:245 0/1:245
1 3000000 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245 ./.:. ./.:.
1 3000150 . C A,G 59.2 PASS AN=8;AC=2,2 GT:GQ 0/1:245 0/1:245 0/2:245 0/2:245
1 3000151 . C A,G 59.2 PASS AN=8;AC=2,2 GT:DP:GQ 0/1:32:245 0/1:32:245 0/2:32:245 0/2:32:245
1 3106154 . C CC,CCC 342 PASS AN=8;AC=2,2 GT:GQ:DP 0/1:245:32 0/1:245:32 0/2:245:32 0/2:245:32
1 3106154 . C A,T 59.2 PASS AN=8;AC=2,2 GT:GQ:DP 0/1:245:32 0/1:245:32 0/2:245:32 0/2:245:32
1 3106154 . C CC,CCC 342 PASS AN=8;AC=2,2 GT:GQ:DP 0/1:245:32 0/1:245:32 0/2:245:32 0/2:245:32
1 3200000 . C T 59.2 PASS AN=8;AC=4 GT:GQ:DP 0/1:245:32 0/1:245:32 0/1:245:32 0/1:245:32
1 3200010 . C T,A 59.2 PASS AN=8;AC=2,2 GT:GQ:DP 0/1:245:32 0/1:245:32 0/2:245:32 0/2:245:32
1 3200020 . C G,T 59.2 PASS AN=0;AC=0,0 GT:GL ./.:1,2,3,4,5,6 .:1,2,3 ./.:1,2,3,4,5,6 .:1,2,3
6 changes: 3 additions & 3 deletions test/merge.2.none.out
Expand Up @@ -21,16 +21,16 @@
##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B 2:A 2:B
1 3000000 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245 ./.:. ./.:.
1 3000000 . C G 59.2 PASS AN=4;AC=2 GT:GQ ./.:. ./.:. 0/1:245 0/1:245
1 3000000 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245 ./.:. ./.:.
1 3000150 . C A 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245 ./.:. ./.:.
1 3000150 . C G 59.2 PASS AN=4;AC=2 GT:GQ ./.:. ./.:. 0/1:245 0/1:245
1 3000151 . C A 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245 ./.:.:. ./.:.:.
1 3000151 . C G 59.2 PASS AN=4;AC=2 GT:DP:GQ ./.:.:. ./.:.:. 0/1:32:245 0/1:32:245
1 3106154 . C CC 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32 ./.:.:. ./.:.:.
1 3106154 . C A 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32 ./.:.:. ./.:.:.
1 3106154 . C CCC 342 PASS AN=4;AC=2 GT:GQ:DP ./.:.:. ./.:.:. 0/1:245:32 0/1:245:32
1 3106154 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP ./.:.:. ./.:.:. 0/1:245:32 0/1:245:32
1 3106154 . C CC 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32 ./.:.:. ./.:.:.
1 3106154 . C CCC 342 PASS AN=4;AC=2 GT:GQ:DP ./.:.:. ./.:.:. 0/1:245:32 0/1:245:32
1 3200000 . C T 59.2 PASS AN=8;AC=4 GT:GQ:DP 0/1:245:32 0/1:245:32 0/1:245:32 0/1:245:32
1 3200010 . C T,A 59.2 PASS AN=8;AC=2,2 GT:GQ:DP 0/1:245:32 0/1:245:32 0/2:245:32 0/2:245:32
1 3200020 . C G,T 59.2 PASS AN=0;AC=0,0 GT:GL ./.:1,2,3,4,5,6 .:1,2,3 ./.:1,2,3,4,5,6 .:1,2,3
9 changes: 4 additions & 5 deletions test/merge.abc.out
Expand Up @@ -38,18 +38,17 @@
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B 2:B C D
1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245 ./.:. ./.:. ./.:.
1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245 ./.:.:. ./.:.:. ./.:.:.
1 3062915 idSNP G T,C,A 419 test;q20 TEST=5;STR=.;DP=14;DP4=3,6,9,12;INTA=2,1,.;AN=9;AC=2,2,1 GT:TT:GQ:DP:GL:STR 0/1:0,1,.:409:35:-20,-5,-20,-20,-5,-20,.,.,.,.:. 2:0,1,.:409:35:-20,-5,-20,.:. 0/3:.:376:14:-10,.,.,.,.,.,0,.,.,-10:ABC 0/2:1,0,.:409:35:-20,-20,-20,-5,-5,-20,.,.,.,.:. 0/1:1,0,.:409:35:-20,-20,-20,-5,-5,-20,.,.,.,.:.
1 3062915 id3D GTTT G 84.6 q10;q20 INDEL;STR=test;TXT=AA;DP=1013;DP4=6,7,8,9;AN=10;AC=5 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20 0/1:376:14:-10,0,-10 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
1 3062915 id1D;id2D GTT GT,G 999 q20;q10 DP=14;DP4=2,4,6,8;AN=6;AC=1,2 GT:GQ:DP:GL:STR ./.:.:.:.:. ./.:.:.:.:. 0/1:376:14:-10,0,-10,.,.,.:DEF 0/2:409:35:-20,.,.,-5,.,-20:. 0/2:409:35:-20,.,.,-5,.,-20:.
1 3062915 idSNP G T,C,A 419 test;q20 TEST=5;STR=.;DP=14;DP4=3,6,9,12;INTA=2,1,.;AN=9;AC=2,2,1 GT:TT:GQ:DP:GL:STR 0/1:0,1,.:409:35:-20,-5,-20,-20,-5,-20,.,.,.,.:. 2:0,1,.:409:35:-20,-5,-20,.:. 0/3:.:376:14:-10,.,.,.,.,.,0,.,.,-10:ABC 0/2:1,0,.:409:35:-20,-20,-20,-5,-5,-20,.,.,.,.:. 0/1:1,0,.:409:35:-20,-20,-20,-5,-5,-20,.,.,.,.:.
1 3106154 . C T 999 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL ./.:.:.:. ./.:.:.:. 0/1:277:15:-10,0,-10 ./.:.:.:. ./.:.:.:.
1 3106154 . CAAAA CA,C 342 PASS DP=15;AN=6;AC=2,1 GT:GQ:DP:GL 0/1:245:32:. 0/1:245:32:. 0/2:277:15:-10,.,.,0,.,-10 .:245:32:. ./.:245:32:.
1 3106154 . C CT 459 PASS AN=8;AC=4 GT:GQ:DP 0/1:245:32 0/1:245:32 ./.:.:. 0/1:245:32 0/1:245:32
1 3106154 . C T 999 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL ./.:.:.:. ./.:.:.:. 0/1:277:15:-10,0,-10 ./.:.:.:. ./.:.:.:.
1 3157410 . GAC GC,G 90.6 q10 DP=11;AN=6;AC=4,1 GT:GQ:DP 1/1:21:21 1/1:21:21 0/2:49:11 ./.:.:. ./.:.:.
1 3157410 . G T 46.7 q10 AN=4;AC=4 GT:GQ:DP ./.:.:. ./.:.:. ./.:.:. 1/1:21:21 1/1:21:21
1 3157410 . GAC GC,G 90.6 q10 DP=11;AN=6;AC=4,1 GT:GQ:DP 1/1:21:21 1/1:21:21 0/2:49:11 ./.:.:. ./.:.:.
1 3162006 . GAA G,GA 238 PASS DP=19;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F;AN=10;AC=3,2 GT:GQ:DP 0/1:212:22 0/1:212:22 0/1:589:19 0/2:212:22 0/2:212:22
1 3177144 . G T 999 PASS DP=24;AN=10;AC=3 GT:GQ:DP 0/0:150:30 1/1:150:30 0/1:236:24 0/0:150:30 0/0:150:30
1 3177144 . G . 45 PASS AN=4 GT:GQ:DP 0/0:150:30 0/0:150:30 ./.:.:. ./.:.:. ./.:.:.
1 3177144 . GT G 999 PASS DP=24;AN=2;AC=1 GT:GQ:DP ./.:.:. ./.:.:. 0/1:236:24 ./.:.:. ./.:.:.
1 3177144 . GT G 999 PASS DP=24;AN=6;AC=1 GT:GQ:DP 0/0:150:30 0/0:150:30 0/1:236:24 ./.:.:. ./.:.:.
1 3184885 . TAAAA TA,T 61.5 PASS DP=16;AN=10;AC=5,4 GT:GQ:DP 1/2:12:10 1/2:12:10 0/1:435:16 1/2:12:10 1/2:12:10
2 3188209 . GA G 41.5 . DP=15;AN=2;AC=1 GT:GQ:DP ./.:.:. ./.:.:. 0/1:162:15 ./.:.:. ./.:.:.
2 3199812 . G GTT,GT 291 PASS AN=8;AC=4,4 GT:GQ:DP 1/2:322:26 1/2:322:26 ./.:.:. 1/2:322:26 1/2:322:26
Expand Down
33 changes: 33 additions & 0 deletions test/merge.gvcf.2.a.vcf
@@ -0,0 +1,33 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
##contig=<ID=3,length=243199373>
##contig=<ID=2,length=243199373>
##contig=<ID=1,length=243199373>
##contig=<ID=4,length=243199373>
##contig=<ID=8,length=243199373>
##contig=<ID=5,length=243199373>
##contig=<ID=6,length=243199373>
##contig=<ID=7,length=243199373>
##INFO=<ID=QS,Number=R,Type=Float,Description="Auxiliary tag used for calling">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=MinDP,Number=1,Type=Integer,Description="Minimum per-sample depth in this gVCF block">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA
2 21444416 . G <*> . . END=21444429;MinDP=5;QS=1,0 PL:DP 0,15,125:5
2 21444430 . TCAA T,TAA 0 . QS=0.603659,0.304878,0.0914634 PL:DP:DV 37,0,79,35,73,113:5:2
2 21444431 . C <*> . . MinDP=4;QS=1,0 PL:DP 0,12,110:4
2 21444431 . CA C 0 . QS=0.75,0.25 PL:DP:DV 0,4,10:4:1
2 21444433 . C <*> 0 . END=21444444;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1
3 1 . C <*> 0 . END=10;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1
1 1619670 . C <*> 0 . END=1619877;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1
4 20000975 . C <*> 0 . END=20001070;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1
4 20001071 . T G,<*> 0 . . PL:DP:DV 0,4,10:4:1
5 110285 . TAACCCC T . . . PL 89,6,0
5 1110285 . T TAACCCC . . . PL 89,6,0
6 600 . T A . . END=666 PL 66,1,1
7 701 . T A . . END=702 PL 77,1,1
7 703 . T A . . END=777 PL 77,1,2
8 1 . T A . . END=10 PL 88,1,1

0 comments on commit ddbda16

Please sign in to comment.