Permalink
Browse files

adding fine control over jexl expression behavior with missing values (

…#772)

* adding JexlMissingValueTreatment to control how values in a jexl expression that are missing in the evaluated context are treated
* adding overloads to VariantContextUtils.match to allow control of missing value behavior
* minor refactoring in JEXLMap

* making enum method package private

* responding to comments

* updated tests

* moving JexlMissingValueTreatment to top level

* fixing typo
  • Loading branch information...
1 parent e69aff0 commit 5a2d7a75b824aab483d201055ea70296a6b9ac10 @lbergelson lbergelson committed on GitHub Dec 9, 2016
@@ -5,61 +5,95 @@
import org.apache.commons.jexl2.JexlException;
import org.apache.commons.jexl2.MapContext;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
/**
* This is an implementation of a Map of {@link JexlVCMatchExp} to true or false values.
* It lazily initializes each value as requested to save as much processing time as possible.
*/
class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
+ /**
+ * If a JEXL expression contains values that are not available in the given context, the default behavior is to
+ * treat that expression as a miss match.
+ */
+ public static final JexlMissingValueTreatment DEFAULT_MISSING_VALUE_TREATMENT = JexlMissingValueTreatment.TREAT_AS_MISMATCH;
+
// our variant context and/or Genotype
private final VariantContext vc;
private final Genotype g;
- // our context
- private JexlContext jContext = null;
+ private final JexlMissingValueTreatment howToTreatMissingValues;
/**
* our mapping from {@link JexlVCMatchExp} to {@link Boolean}s, which will be set to {@code NULL}
* for previously un-cached {@link JexlVCMatchExp}.
*/
- private Map<JexlVCMatchExp,Boolean> jexl;
+ private final Map<JexlVCMatchExp,Boolean> jexl;
- public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) {
- initialize(jexlCollection);
+ // our context
+ private JexlContext jContext = null;
+
+ /**
+ * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context
+ * @param jexlCollection collection of expressions to be evaluated
+ * @param vc VariantContext to evaluate expressions against
+ * @param g genotype to evaluate expressions against, may be null
+ * @param howToTreatMissingValues how missing values in vc and g should be treated
+ */
+ public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g, final JexlMissingValueTreatment howToTreatMissingValues) {
+ this.jexl = initializeMap(jexlCollection);
this.vc = vc;
this.g = g;
+ this.howToTreatMissingValues = howToTreatMissingValues;
}
+
+ /**
+ * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context
+ * @param jexlCollection collection of expressions to be evaluated
+ * @param vc VariantContext to evaluate expressions against
+ * @param g genotype to evaluate expressions against, may be null
+ *
+ * missing values are treated as false
+ */
+ public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) {
+ this(jexlCollection, vc, g, DEFAULT_MISSING_VALUE_TREATMENT);
+ }
+
+ /**
+ * Construct a new JEXLMap which can evaluate expressions against a specific VariantContext
+ * @param jexlCollection collection of expressions to be evaluated
+ * @param vc VariantContext to evaluate expressions against
+ *
+ * missing values are treated as non matches (false)
+ */
public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc) {
- this(jexlCollection, vc, null);
+ this(jexlCollection, vc, null, DEFAULT_MISSING_VALUE_TREATMENT);
}
/**
* Note: due to laziness, this accessor actually modifies the instance by possibly forcing evaluation of an Jexl expression.
*
- * @throws IllegalArgumentException when {@code o} is {@code null} or
+ * @throws IllegalArgumentException when {@code key} is {@code null} or
* when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions.
*/
- public Boolean get(Object o) {
- if (o == null) {
+ public Boolean get(Object key) {
+ if (key == null) {
throw new IllegalArgumentException("Query key is null");
}
// if we've already determined the value, return it
- if (jexl.containsKey(o) && jexl.get(o) != null) {
- return jexl.get(o);
+ final Boolean value = jexl.get(key);
+ if (jexl.containsKey(key) && value != null) {
+ return value;
}
// otherwise cast the expression and try again
- final JexlVCMatchExp e = (JexlVCMatchExp) o;
- evaluateExpression(e);
- return jexl.get(e);
+ final JexlVCMatchExp exp = (JexlVCMatchExp) key;
+ final boolean matches = evaluateExpression(exp);
+ jexl.put(exp, matches);
+ return matches;
}
/**
@@ -87,9 +121,7 @@ public Boolean get(Object o) {
*/
public Collection<Boolean> values() {
for (final JexlVCMatchExp exp : jexl.keySet()) {
- if (jexl.get(exp) == null) {
- evaluateExpression(exp);
- }
+ jexl.computeIfAbsent(exp, k -> evaluateExpression(exp));
}
return jexl.values();
}
@@ -112,55 +144,60 @@ public void putAll(Map<? extends JexlVCMatchExp, ? extends Boolean> map) {
}
/**
- * Initializes all keys with null values indicating that they have not yet been evaluated.
+ * Initializes a map and give all keys with null values indicating that they have not yet been evaluated.
* The actual value will be computed only when the key is requested via {@link #get(Object)} or {@link #values()}.
+ *
+ * @return an initialized map of jexlExpression -> null
*/
- private void initialize(Collection<JexlVCMatchExp> jexlCollection) {
- jexl = new HashMap<>();
+ private static Map<JexlVCMatchExp,Boolean> initializeMap(final Collection<JexlVCMatchExp> jexlCollection) {
+ final Map<JexlVCMatchExp,Boolean> jexlMap = new HashMap<>(jexlCollection.size());
for (final JexlVCMatchExp exp: jexlCollection) {
- jexl.put(exp, null);
+ jexlMap.put(exp, null);
}
+
+ return jexlMap;
}
/**
* Evaluates a {@link JexlVCMatchExp}'s expression, given the current context (and setup the context if it's {@code null}).
*
* @param exp the {@link JexlVCMatchExp} to evaluate
- *
+ * @return true if the expression matched the context
* @throws IllegalArgumentException when {@code exp} is {@code null}, or
* when the Jexl expression in {@code exp} fails to evaluate the JexlContext
* constructed with the input VC or genotype.
*/
- private void evaluateExpression(final JexlVCMatchExp exp) {
+ private boolean evaluateExpression(final JexlVCMatchExp exp) {
// if the context is null, we need to create it to evaluate the JEXL expression
if (this.jContext == null) {
- createContext();
+ jContext = createContext();
}
try {
+ //TODO figure out of this can ever evaluate to null or if that isn't actually possible
final Boolean value = (Boolean) exp.exp.evaluate(jContext);
- // treat errors as no match
- jexl.put(exp, value == null ? false : value);
+ return value == null ? howToTreatMissingValues.getMissingValueOrExplode() : value;
} catch (final JexlException.Variable e) {
- // if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE
- jexl.put(exp,false);
+ //this occurs when the jexl expression contained a literal that didn't match anything in the given context
+ return howToTreatMissingValues.getMissingValueOrExplode();
} catch (final JexlException e) {
// todo - might be better if no exception is caught here but let's user decide how to deal with them; note this will propagate to get() and values()
throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s", exp.name), e);
}
}
/**
- * Create the internal JexlContext, only when required.
+ * Create a new JexlContext
* This code is where new JEXL context variables should get added.
+ * @return a new jexl context initialized appropriately
*/
- private void createContext() {
+ private JexlContext createContext() {
if (vc == null) {
- jContext = new MapContext(Collections.emptyMap());
+ return new MapContext(Collections.emptyMap());
} else if (g == null) {
- jContext = new VariantJEXLContext(vc);
+ return new VariantJEXLContext(vc);
} else {
- jContext = new GenotypeJEXLContext(vc, g);
+ return new GenotypeJEXLContext(vc, g);
}
}
@@ -181,7 +218,7 @@ public Boolean remove(Object o) {
public Set<Entry<JexlVCMatchExp, Boolean>> entrySet() {
- throw new UnsupportedOperationException("clear() not supported on a JEXLMap");
+ throw new UnsupportedOperationException("entrySet() not supported on a JEXLMap");
}
// nope
@@ -0,0 +1,39 @@
+package htsjdk.variant.variantcontext;
+
+import java.util.function.Supplier;
+
+/**
+ * How to treat values that appear in a jexl expression but are missing in the context it's applied to
+ */
+public enum JexlMissingValueTreatment {
+ /**
+ * Treat expressions with a missing value as a mismatch and evaluate to false
+ */
+ TREAT_AS_MISMATCH(() -> false),
+
+ /**
+ * Treat expressions with a missing value as a match and evaluate to true
+ */
+ TREAT_AS_MATCH(() -> true),
+
+ /**
+ * Treat expressions with a missing value as an error and throw an {@link IllegalArgumentException}
+ */
+ THROW(() -> {throw new IllegalArgumentException("Jexl Expression couldn't be evaluated because there was a missing value.");});
+
+ private final Supplier<Boolean> resultSupplier;
+
+ JexlMissingValueTreatment(final Supplier<Boolean> resultSupplier){
+ this.resultSupplier = resultSupplier;
+ }
+
+ /**
+ * get the missing value that corresponds to this option or throw an exception
+ * @return the value that should be used in case of a missing value
+ * @throws IllegalArgumentException if this should be treated as an error
+ */
+ boolean getMissingValueOrExplode(){
+ return resultSupplier.get();
+ }
+
+}
@@ -307,6 +307,7 @@ public static boolean match(VariantContext vc, JexlVCMatchExp exp) {
* This the best way to apply JEXL expressions to {@link VariantContext} records.
* Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
+ * Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching
* @param vc variant context
* @param exps expressions
* @return true if there is a match
@@ -324,7 +325,20 @@ public static boolean match(VariantContext vc, JexlVCMatchExp exp) {
* @return true if there is a match
*/
public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
- return match(vc,g, Collections.singletonList(exp)).get(exp);
+ return match(vc, g, Collections.singletonList(exp), JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT).get(exp);
+ }
+
+ /**
+ * Returns true if {@code exp} match {@code vc}, {@code g}.
+ * See {@link #match(VariantContext, Genotype, Collection)} for full docs.
+ * @param vc variant context
+ * @param g genotype
+ * @param exp expression
+ * @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context
+ * @return true if there is a match
+ */
+ public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp, JexlMissingValueTreatment howToTreatMissingValues) {
+ return match(vc, g, Collections.singletonList(exp), howToTreatMissingValues).get(exp);
}
/**
@@ -333,13 +347,30 @@ public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
* This the best way to apply JEXL expressions to {@link VariantContext} records.
* Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
+ * Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching
* @param vc variant context
* @param g genotype
* @param exps expressions
* @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps) {
- return new JEXLMap(exps,vc,g);
+ return match(vc, g, exps, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT);
+ }
+
+ /**
+ * Matches each {@link JexlVCMatchExp} exp against the data contained in {@code vc}, {@code g},
+ * and returns a map from these expressions to {@code true} (if they matched) or {@code false} (if they didn't).
+ * This the best way to apply JEXL expressions to {@link VariantContext} records.
+ * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
+ *
+ * @param vc variant context
+ * @param g genotype
+ * @param exps expressions
+ * @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context
+ * @return true if there is a match
+ */
+ public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps, JexlMissingValueTreatment howToTreatMissingValues) {
+ return new JEXLMap(exps, vc, g, howToTreatMissingValues);
}
/**
@@ -31,14 +31,10 @@
import htsjdk.variant.vcf.VCFConstants;
import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
/**
@@ -55,6 +51,10 @@
private static final VariantContextUtils.JexlVCMatchExp exp
= new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.get().createExpression("QUAL > 500.0"));
+ private static final JexlVCMatchExp missingValueExpression = new VariantContextUtils.JexlVCMatchExp(
+ "Zis10", VariantContextUtils.engine.get().createExpression("Z==10"));
+
+
// SNP alleles: A[ref]/T[alt] at chr1:10. One (crappy) sample, one (bare minimum) VC.
private static final SimpleFeature eventLoc = new SimpleFeature("chr1", 10, 10);
private static final Allele Aref = Allele.create("A", true);
@@ -87,7 +87,45 @@ public void testGetValue() {
// eval our known expression
Assert.assertTrue(!jexlMap.get(exp));
}
-
+
+ @Test(dataProvider = "getMissingValueTestData")
+ public void testMissingBehaviorThroughMatch(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class<? extends Exception> expectedException){
+ if(expectedException == null) {
+ Assert.assertEquals(VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment), expected);
+ } else {
+ Assert.assertThrows(expectedException, () -> VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment));
+ }
+ }
+
+ @Test(dataProvider = "getMissingValueTestData")
+ public void testMissingBehavior(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class<? extends Exception> expectedException){
+ final JEXLMap jexlMap = new JEXLMap(Collections.singletonList(missingValueExpression), vc, null, missingValueTreatment);
+ if(expectedException == null) {
+ Assert.assertEquals((boolean) jexlMap.get(missingValueExpression), expected);
+ } else {
+ Assert.assertThrows(expectedException, () -> jexlMap.get(missingValueExpression));
+ }
+ }
+
+ @DataProvider
+ public Object[][] getMissingValueTestData(){
+ final List<Allele> alleles = Arrays.asList(Aref, Talt);
+ VariantContextBuilder vcb = new VariantContextBuilder("test", "chr1", 10, 10, alleles);
+ VariantContext noZ = vcb.make();
+ VariantContext hasZ = vcb.attribute("Z", 0).make();
+
+ return new Object[][]{
+ {noZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null},
+ {hasZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null}, //the value isn't missing but the expression is false
+ {noZ, JexlMissingValueTreatment.TREAT_AS_MATCH, true, null},
+ {hasZ, JexlMissingValueTreatment.TREAT_AS_MATCH, false, null}, //the value isn't missing but the expression is false
+ {noZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null},
+ {hasZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null},
+ {noZ, JexlMissingValueTreatment.THROW, false, IllegalArgumentException.class},
+ {hasZ, JexlMissingValueTreatment.THROW, false, null}
+ };
+ }
+
// Testing the new 'FT' and 'isPassFT' expressions in the JEXL map
@Test
public void testJEXLGenotypeFilters() {

0 comments on commit 5a2d7a7

Please sign in to comment.