Skip to content

Commit

Permalink
Breaking change in filtering expressions on FILTER column.
Browse files Browse the repository at this point in the history
Previously only the logic of a subset was supported, now both
subset and exact match can be queried. For example:

    FILTER="PASS"
    FILTER="A"          .. exact match, for example "A;B" does not pass
    FILTER!="A"         .. exact match, for example "A;B" does pass
    FILTER~"A"          .. both "A" and "A;B" pass
    FILTER!~"A"         .. neither "A" nor "A;B" pass

Resolves #1285
  • Loading branch information
pd3 committed Aug 21, 2020
1 parent 75ed6f9 commit fea8773
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 7 deletions.
9 changes: 9 additions & 0 deletions NEWS
Expand Up @@ -6,6 +6,15 @@
functionality was lost (plotting and clustering) but may be added
back on popular demand.

* Breaking change in -i/-e expressions on the FILTER column. Originally
it was possible to query only a subset of filters, but not an exact match.
The new behavior is:

FILTER="A" .. exact match, for example "A;B" does not pass
FILTER!="A" .. exact match, for example "A;B" does pass
FILTER~"A" .. both "A" and "A;B" pass
FILTER!~"A" .. neither "A" nor "A;B" pass

## Release 1.10.2 (19th December 2019)

This is a release fix that corrects minor inconsistencies discovered in
Expand Down
8 changes: 8 additions & 0 deletions doc/bcftools.txt
Expand Up @@ -2970,6 +2970,14 @@ about the distinction between "&&" vs "&" and "||" vs "|".
FORMAT/DV, FMT/DV, or DV
FILTER, QUAL, ID, CHROM, POS, REF, ALT[0]

* starting with 1.10.3, the FILTER column can be queried as follows:

FILTER="PASS"
FILTER="A" .. exact match, for example "A;B" does not pass
FILTER!="A" .. exact match, for example "A;B" does pass
FILTER~"A" .. both "A" and "A;B" pass
FILTER!~"A" .. neither "A" nor "A;B" pass

* 1 (or 0) to test the presence (or absence) of a flag

FlagA=1 && FlagB=0
Expand Down
38 changes: 31 additions & 7 deletions filter.c
Expand Up @@ -165,6 +165,8 @@ struct _filter_t
#define TOK_sMEDIAN 35
#define TOK_sSTDEV 36
#define TOK_sSUM 37
#define TOK_IN 38 // contains, e.g. FILTER~"A"
#define TOK_NOT_IN 39 // does not contain, e.g. FILTER!~"A"

// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
// ( ) [ < = > ] ! | & + - * / M m a A O ~ ^ S . l f c p b P i s
Expand Down Expand Up @@ -438,7 +440,7 @@ static void filters_cmp_bit_and(token_t *atok, token_t *btok, token_t *rtok, bcf
static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *line)
{
int i;
if ( rtok->tok_type==TOK_NE ) // AND logic: none of the filters can match
if ( rtok->tok_type==TOK_NOT_IN )
{
if ( !line->d.n_flt )
{
Expand All @@ -451,7 +453,7 @@ static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1
rtok->pass_site = 1;
return;
}
else if ( rtok->tok_type==TOK_EQ ) // OR logic: at least one of the filters must match
else if ( rtok->tok_type==TOK_IN )
{
if ( !line->d.n_flt )
{
Expand All @@ -462,8 +464,30 @@ static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1
if ( atok->hdr_id==line->d.flt[i] ) { rtok->pass_site = 1; return; }
return;
}
else if ( rtok->tok_type==TOK_NE ) // exact match
{
if ( !line->d.n_flt )
{
if ( atok->hdr_id==-1 ) return; // missing value
rtok->pass_site = 1;
return; // no filter present, eval to true
}
if ( line->d.n_flt==1 && atok->hdr_id==line->d.flt[0] ) return; // exact match, fail iff a single matching value is present
rtok->pass_site = 1;
return;
}
else if ( rtok->tok_type==TOK_EQ ) // exact match, pass iff a single matching value is present
{
if ( !line->d.n_flt )
{
if ( atok->hdr_id==-1 ) { rtok->pass_site = 1; return; }
return; // no filter present, eval to false
}
if ( line->d.n_flt==1 && atok->hdr_id==line->d.flt[0] ) rtok->pass_site = 1;
return;
}
else
error("Only == and != operators are supported for FILTER\n");
error("Only ==, !=, ~, and !~ operators are supported for FILTER\n");
return;
}
static void filters_cmp_id(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *line)
Expand Down Expand Up @@ -3352,11 +3376,11 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str);
int itok = i, ival;
if ( out[i+1].tok_type==TOK_EQ || out[i+1].tok_type==TOK_NE ) ival = i - 1;
else if ( out[i+1].tok_type==TOK_LIKE ) out[i+1].tok_type = TOK_EQ, ival = i - 1;
else if ( out[i+1].tok_type==TOK_NLIKE ) out[i+1].tok_type = TOK_NE, ival = i - 1;
else if ( out[i+1].tok_type==TOK_LIKE ) out[i+1].tok_type = TOK_IN, ival = i - 1;
else if ( out[i+1].tok_type==TOK_NLIKE ) out[i+1].tok_type = TOK_NOT_IN, ival = i - 1;
else if ( out[i+2].tok_type==TOK_EQ || out[i+2].tok_type==TOK_NE ) ival = ++i;
else if ( out[i+2].tok_type==TOK_LIKE ) out[i+2].tok_type = TOK_EQ, ival = ++i;
else if ( out[i+2].tok_type==TOK_NLIKE ) out[i+2].tok_type = TOK_NE, ival = ++i;
else if ( out[i+2].tok_type==TOK_LIKE ) out[i+2].tok_type = TOK_IN, ival = ++i;
else if ( out[i+2].tok_type==TOK_NLIKE ) out[i+2].tok_type = TOK_NOT_IN, ival = ++i;
else error("[%s:%d %s] Could not parse the expression: %s\n", __FILE__,__LINE__,__FUNCTION__, filter->str);
if ( out[ival].tok_type!=TOK_VAL || !out[ival].key )
error("[%s:%d %s] Could not parse the expression, an unquoted string value perhaps? %s\n", __FILE__,__LINE__,__FUNCTION__, filter->str);
Expand Down
10 changes: 10 additions & 0 deletions test/filter.11.vcf
@@ -0,0 +1,10 @@
##fileformat=VCFv4.1
##contig=<ID=1,assembly=b37,length=249250621>
##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
##FILTER=<ID=A,Description="Filter AA">
##FILTER=<ID=AB,Description="Filter BB">
#CHROM POS ID REF ALT QUAL FILTER INFO
1 301 . C T . PASS .
1 302 . C T . A .
1 303 . C T . AB .
1 304 . C T . A;AB .
1 change: 1 addition & 0 deletions test/query.76.out
@@ -0,0 +1 @@
302 A
3 changes: 3 additions & 0 deletions test/query.77.out
@@ -0,0 +1,3 @@
301 PASS
303 AB
304 A;AB
2 changes: 2 additions & 0 deletions test/query.78.out
@@ -0,0 +1,2 @@
302 A
304 A;AB
2 changes: 2 additions & 0 deletions test/query.79.out
@@ -0,0 +1,2 @@
301 PASS
303 AB
4 changes: 4 additions & 0 deletions test/test.pl
Expand Up @@ -134,6 +134,10 @@
test_vcf_query($opts,in=>'missing',out=>'query.23.out',args=>q[-e'ISTR="."' -f'%POS %ISTR\\n']);
test_vcf_query($opts,in=>'missing',out=>'query.22.out',args=>q[-e'ISTR!="."' -f'%POS %ISTR\\n']);
test_vcf_query($opts,in=>'missing',out=>'query.24.out',args=>q[-i'FILTER="q11"' -f'%POS %ISTR\\n']);
test_vcf_query($opts,in=>'filter.11',out=>'query.76.out',args=>q[-i'FILTER="A"' -f'%POS %FILTER\\n']);
test_vcf_query($opts,in=>'filter.11',out=>'query.77.out',args=>q[-i'FILTER!="A"' -f'%POS %FILTER\\n']);
test_vcf_query($opts,in=>'filter.11',out=>'query.78.out',args=>q[-i'FILTER~"A"' -f'%POS %FILTER\\n']);
test_vcf_query($opts,in=>'filter.11',out=>'query.79.out',args=>q[-i'FILTER!~"A"' -f'%POS %FILTER\\n']);
test_vcf_query($opts,in=>'query',out=>'query.25.out',args=>q[-f'%LINE']);
test_vcf_query($opts,in=>'query.filter-type',out=>'query.26.out',args=>q[-f'%POS\\t%REF\\t%ALT\\n' -i'type="snp"']);
test_vcf_query($opts,in=>'query.filter-type',out=>'query.27.out',args=>q[-f'%POS\\t%REF\\t%ALT\\n' -i'type~"snp"']);
Expand Down

0 comments on commit fea8773

Please sign in to comment.