adding ability to control jexl expression behavior when missing values #772
Merged
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
fd04b41
refactoring jexl
lbergelson 108bbe7
add ability to treat missing values differently
lbergelson d88c5c6
adding overloads to match to allow control of missing value behavior
lbergelson 15b77d2
making enum method package private
lbergelson 2d2d78e
responding to comments
lbergelson 85736cb
updated tests
lbergelson 4988b28
moving JexlMissingValueTreatment to top level
lbergelson 8509ba2
fixing typo
lbergelson
Jump to file or symbol
Failed to load files and symbols.
| @@ -5,61 +5,95 @@ | ||
| import org.apache.commons.jexl2.JexlException; | ||
| import org.apache.commons.jexl2.MapContext; | ||
| -import java.util.Collection; | ||
| -import java.util.Collections; | ||
| -import java.util.HashMap; | ||
| -import java.util.Map; | ||
| -import java.util.Set; | ||
| +import java.util.*; | ||
| /** | ||
| * This is an implementation of a Map of {@link JexlVCMatchExp} to true or false values. | ||
| * It lazily initializes each value as requested to save as much processing time as possible. | ||
| */ | ||
| class JEXLMap implements Map<JexlVCMatchExp, Boolean> { | ||
lbergelson
Contributor
|
||
| + /** | ||
| + * If a JEXL expression contains values that are not available in the given context, the default behavior is to | ||
| + * treat that expression as a miss match. | ||
| + */ | ||
| + public static final JexlMissingValueTreatment DEFAULT_MISSING_VALUE_TREATMENT = JexlMissingValueTreatment.TREAT_AS_MISMATCH; | ||
| + | ||
| // our variant context and/or Genotype | ||
| private final VariantContext vc; | ||
| private final Genotype g; | ||
| - // our context | ||
| - private JexlContext jContext = null; | ||
| + private final JexlMissingValueTreatment howToTreatMissingValues; | ||
| /** | ||
| * our mapping from {@link JexlVCMatchExp} to {@link Boolean}s, which will be set to {@code NULL} | ||
| * for previously un-cached {@link JexlVCMatchExp}. | ||
| */ | ||
| - private Map<JexlVCMatchExp,Boolean> jexl; | ||
| + private final Map<JexlVCMatchExp,Boolean> jexl; | ||
| - public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) { | ||
| - initialize(jexlCollection); | ||
| + // our context | ||
| + private JexlContext jContext = null; | ||
| + | ||
| + /** | ||
| + * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context | ||
| + * @param jexlCollection collection of expressions to be evaluated | ||
| + * @param vc VariantContext to evaluate expressions against | ||
| + * @param g genotype to evaluate expressions against, may be null | ||
| + * @param howToTreatMissingValues how missing values in vc and g should be treated | ||
| + */ | ||
| + public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g, final JexlMissingValueTreatment howToTreatMissingValues) { | ||
| + this.jexl = initializeMap(jexlCollection); | ||
| this.vc = vc; | ||
| this.g = g; | ||
| + this.howToTreatMissingValues = howToTreatMissingValues; | ||
| } | ||
| + | ||
| + /** | ||
| + * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context | ||
| + * @param jexlCollection collection of expressions to be evaluated | ||
| + * @param vc VariantContext to evaluate expressions against | ||
| + * @param g genotype to evaluate expressions against, may be null | ||
| + * | ||
| + * missing values are treated as false | ||
| + */ | ||
| + public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) { | ||
| + this(jexlCollection, vc, g, DEFAULT_MISSING_VALUE_TREATMENT); | ||
| + } | ||
| + | ||
| + /** | ||
| + * Construct a new JEXLMap which can evaluate expressions against a specific VariantContext | ||
| + * @param jexlCollection collection of expressions to be evaluated | ||
| + * @param vc VariantContext to evaluate expressions against | ||
| + * | ||
| + * missing values are treated as non matches (false) | ||
| + */ | ||
| public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc) { | ||
| - this(jexlCollection, vc, null); | ||
| + this(jexlCollection, vc, null, DEFAULT_MISSING_VALUE_TREATMENT); | ||
| } | ||
| /** | ||
| * Note: due to laziness, this accessor actually modifies the instance by possibly forcing evaluation of an Jexl expression. | ||
| * | ||
| - * @throws IllegalArgumentException when {@code o} is {@code null} or | ||
| + * @throws IllegalArgumentException when {@code key} is {@code null} or | ||
| * when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions. | ||
| */ | ||
| - public Boolean get(Object o) { | ||
| - if (o == null) { | ||
| + public Boolean get(Object key) { | ||
| + if (key == null) { | ||
| throw new IllegalArgumentException("Query key is null"); | ||
| } | ||
| // if we've already determined the value, return it | ||
| - if (jexl.containsKey(o) && jexl.get(o) != null) { | ||
| - return jexl.get(o); | ||
| + final Boolean value = jexl.get(key); | ||
| + if (jexl.containsKey(key) && value != null) { | ||
| + return value; | ||
| } | ||
| // otherwise cast the expression and try again | ||
| - final JexlVCMatchExp e = (JexlVCMatchExp) o; | ||
| - evaluateExpression(e); | ||
| - return jexl.get(e); | ||
| + final JexlVCMatchExp exp = (JexlVCMatchExp) key; | ||
| + final boolean matches = evaluateExpression(exp); | ||
| + jexl.put(exp, matches); | ||
| + return matches; | ||
| } | ||
| /** | ||
| @@ -87,9 +121,7 @@ public Boolean get(Object o) { | ||
| */ | ||
| public Collection<Boolean> values() { | ||
| for (final JexlVCMatchExp exp : jexl.keySet()) { | ||
| - if (jexl.get(exp) == null) { | ||
| - evaluateExpression(exp); | ||
| - } | ||
| + jexl.computeIfAbsent(exp, k -> evaluateExpression(exp)); | ||
| } | ||
| return jexl.values(); | ||
| } | ||
| @@ -112,55 +144,60 @@ public void putAll(Map<? extends JexlVCMatchExp, ? extends Boolean> map) { | ||
| } | ||
| /** | ||
| - * Initializes all keys with null values indicating that they have not yet been evaluated. | ||
| + * Initializes a map and give all keys with null values indicating that they have not yet been evaluated. | ||
| * The actual value will be computed only when the key is requested via {@link #get(Object)} or {@link #values()}. | ||
| + * | ||
| + * @return an initialized map of jexlExpression -> null | ||
| */ | ||
| - private void initialize(Collection<JexlVCMatchExp> jexlCollection) { | ||
| - jexl = new HashMap<>(); | ||
| + private static Map<JexlVCMatchExp,Boolean> initializeMap(final Collection<JexlVCMatchExp> jexlCollection) { | ||
| + final Map<JexlVCMatchExp,Boolean> jexlMap = new HashMap<>(jexlCollection.size()); | ||
| for (final JexlVCMatchExp exp: jexlCollection) { | ||
| - jexl.put(exp, null); | ||
| + jexlMap.put(exp, null); | ||
| } | ||
| + | ||
| + return jexlMap; | ||
| } | ||
| /** | ||
| * Evaluates a {@link JexlVCMatchExp}'s expression, given the current context (and setup the context if it's {@code null}). | ||
| * | ||
| * @param exp the {@link JexlVCMatchExp} to evaluate | ||
| - * | ||
| + * @return true if the expression matched the context | ||
| * @throws IllegalArgumentException when {@code exp} is {@code null}, or | ||
| * when the Jexl expression in {@code exp} fails to evaluate the JexlContext | ||
| * constructed with the input VC or genotype. | ||
| */ | ||
| - private void evaluateExpression(final JexlVCMatchExp exp) { | ||
| + private boolean evaluateExpression(final JexlVCMatchExp exp) { | ||
|
|
||
| // if the context is null, we need to create it to evaluate the JEXL expression | ||
| if (this.jContext == null) { | ||
| - createContext(); | ||
| + jContext = createContext(); | ||
| } | ||
| try { | ||
| + //TODO figure out of this can ever evaluate to null or if that isn't actually possible | ||
| final Boolean value = (Boolean) exp.exp.evaluate(jContext); | ||
| - // treat errors as no match | ||
| - jexl.put(exp, value == null ? false : value); | ||
| + return value == null ? howToTreatMissingValues.getMissingValueOrExplode() : value; | ||
| } catch (final JexlException.Variable e) { | ||
| - // if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE | ||
| - jexl.put(exp,false); | ||
| + //this occurs when the jexl expression contained a literal that didn't match anything in the given context | ||
| + return howToTreatMissingValues.getMissingValueOrExplode(); | ||
| } catch (final JexlException e) { | ||
| // todo - might be better if no exception is caught here but let's user decide how to deal with them; note this will propagate to get() and values() | ||
| throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s", exp.name), e); | ||
| } | ||
| } | ||
| /** | ||
| - * Create the internal JexlContext, only when required. | ||
| + * Create a new JexlContext | ||
| * This code is where new JEXL context variables should get added. | ||
| + * @return a new jexl context initialized appropriately | ||
| */ | ||
| - private void createContext() { | ||
| + private JexlContext createContext() { | ||
| if (vc == null) { | ||
| - jContext = new MapContext(Collections.emptyMap()); | ||
| + return new MapContext(Collections.emptyMap()); | ||
| } else if (g == null) { | ||
| - jContext = new VariantJEXLContext(vc); | ||
| + return new VariantJEXLContext(vc); | ||
| } else { | ||
| - jContext = new GenotypeJEXLContext(vc, g); | ||
| + return new GenotypeJEXLContext(vc, g); | ||
| } | ||
| } | ||
| @@ -181,7 +218,7 @@ public Boolean remove(Object o) { | ||
| public Set<Entry<JexlVCMatchExp, Boolean>> entrySet() { | ||
| - throw new UnsupportedOperationException("clear() not supported on a JEXLMap"); | ||
| + throw new UnsupportedOperationException("entrySet() not supported on a JEXLMap"); | ||
| } | ||
| // nope | ||
| @@ -0,0 +1,39 @@ | ||
| +package htsjdk.variant.variantcontext; | ||
| + | ||
| +import java.util.function.Supplier; | ||
| + | ||
| +/** | ||
| + * How to treat values that appear in a jexl expression but are missing in the context it's applied to | ||
| + */ | ||
| +public enum JexlMissingValueTreatment { | ||
| + /** | ||
| + * Treat expressions with a missing value as a mismatch and evaluate to false | ||
| + */ | ||
| + TREAT_AS_MISMATCH(() -> false), | ||
| + | ||
| + /** | ||
| + * Treat expressions with a missing value as a match and evaluate to true | ||
| + */ | ||
| + TREAT_AS_MATCH(() -> true), | ||
| + | ||
| + /** | ||
| + * Treat expressions with a missing value as an error and throw an {@link IllegalArgumentException} | ||
| + */ | ||
| + THROW(() -> {throw new IllegalArgumentException("Jexl Expression couldn't be evaluated because there was a missing value.");}); | ||
| + | ||
| + private final Supplier<Boolean> resultSupplier; | ||
| + | ||
| + JexlMissingValueTreatment(final Supplier<Boolean> resultSupplier){ | ||
| + this.resultSupplier = resultSupplier; | ||
| + } | ||
| + | ||
| + /** | ||
| + * get the missing value that corresponds to this option or throw an exception | ||
| + * @return the value that should be used in case of a missing value | ||
| + * @throws IllegalArgumentException if this should be treated as an error | ||
| + */ | ||
| + boolean getMissingValueOrExplode(){ | ||
| + return resultSupplier.get(); | ||
| + } | ||
| + | ||
| +} |
This class
JEXLMapappears to have no direct unit testing at all. Can you add aJEXLMapUnitTestthat at least covers the three different missing value behaviors?