Skip to content

Commit

Permalink
Fix optional CoordinationPatterns - both conjunctions and disjunction…
Browse files Browse the repository at this point in the history
…s would infinite loop, as the matcher would accept a failed match even after a previous success (meaning it would always be willing to accept the same failed match again). #1375
  • Loading branch information
AngledLuffa committed Jul 17, 2023
1 parent 50f0985 commit cc7983e
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 4 deletions.
49 changes: 45 additions & 4 deletions src/edu/stanford/nlp/trees/tregex/CoordinationPattern.java
Expand Up @@ -131,22 +131,51 @@ void resetChildIter(Tree tree) {
}
}

@Override
boolean isReset() {
// if we partially or completely went through the node,
// we are obviously not reset
if (currChild != 0) {
return false;
}
// if we're at the start, and the first child is not
// initialized, we haven't done anything yet
if (children[0] == null) {
return true;
}
// otherwise, we may have initialized the child
// on a previous time through, then reset it
return (children[0].isReset());
}

// find the next local match
@Override
public boolean matches() { // also known as "FUN WITH LOGIC"
if (considerAll) {
// these are the cases where all children must be considered to match
if (currChild < 0) {
// a past call to this node either got that it failed
// A past call to this node either got that it failed
// matching or that it was a negative match that succeeded,
// which we only want to accept once
return myNode.isOptional();
// Note that in the case of isOptional nodes, we want to NOT
// match again. The previous time through failed to match,
// but was already returned as true because of isOptional().
// If we match again, it will infinite loop because it
// keeps "succeeding" at the optional match
return false;
}

// we must have happily reached the end of a match the last
// time we were here
// we track pastSuccess so that if we reach a failure in
// an optional node after already succeeding, we don't return
// another success, which would be a spurious extra match
final boolean pastSuccess;
if (currChild == children.length) {
--currChild;
pastSuccess = true;
} else {
pastSuccess = false;
}

while (true) {
Expand All @@ -173,7 +202,7 @@ public boolean matches() { // also known as "FUN WITH LOGIC"
// earlier location.
--currChild;
if (currChild < 0) {
return myNode.isOptional();
return myNode.isOptional() && !pastSuccess;
}
} else {
// oops, this didn't work - negated disjunction version
Expand All @@ -188,6 +217,16 @@ public boolean matches() { // also known as "FUN WITH LOGIC"
}
}
} else {
// Track the first time through this loop
// This will let us handle optional disjunctions
// Note that we can't just check currChild, since it might be 0
// for a match that already hit once on the first child
// We also can't check that children[0] is not created, since
// it might be created and then later reset if this node is
// reached a second time after something higher in the tree
// already matched
final boolean firstTime = isReset();

// these are the cases where a single child node can make you match
for (; currChild < children.length; currChild++) {
if (children[currChild] == null) {
Expand All @@ -214,7 +253,9 @@ public boolean matches() { // also known as "FUN WITH LOGIC"
children[resetChild].resetChildIter();
}
}
return myNode.isOptional();
// only accept an optional disjunction if this is the first time through
// otherwise, we'd be accepting the same disjunction over and over
return firstTime && myNode.isOptional();
}
}

Expand Down
5 changes: 5 additions & 0 deletions src/edu/stanford/nlp/trees/tregex/DescriptionPattern.java
Expand Up @@ -356,6 +356,11 @@ private void resetChild() {
}
}

@Override
boolean isReset() {
return (treeNodeMatchCandidateIterator == null);
}

/* goes to the next node in the tree that is a successful match to my description pattern.
* This is the hotspot method in running tregex, but not clear how to make it faster. */
// when finished = false; break; is called, it means I successfully matched.
Expand Down
10 changes: 10 additions & 0 deletions src/edu/stanford/nlp/trees/tregex/TregexMatcher.java
Expand Up @@ -90,6 +90,16 @@ void resetChildIter(Tree tree) {
*/
abstract void resetChildIter();

/**
* Specifically useful for CoordinationPattern optional disjunctions. <br>
* We want to know if the node has succeeded at least once already,
* in which case we won't succeed in the case of a failure of all
* remaining children. <br>
* Checking this instead of keeping a variable will be slightly faster
* for the standard use case of not using disjunctions
*/
abstract boolean isReset();

/**
* Does the pattern match the tree? It's actually closer to java.util.regex's
* "lookingAt" in that the root of the tree has to match the root of the pattern
Expand Down
83 changes: 83 additions & 0 deletions test/src/edu/stanford/nlp/trees/tregex/TregexTest.java
Expand Up @@ -1571,6 +1571,89 @@ public void testOptionalToString() {
assertEquals("Root (A ?[< B | < C ])", pattern.toString());
}

/**
* Tests the subtree pattern, <code>&lt;...</code>, which checks for
* an exact subtree under our current tree, but test it as an optional relation.
*<br>
* Checks a bug reported by @tanloong (https://github.com/stanfordnlp/CoreNLP/issues/1375)
*<br>
* The optional subtree should only match exactly once,
* but its buggy form was an infinite loop
* (see CoordinationPattern for more notes on why)
*/
public void testOptionalSubtreePattern() {
runTest("A ?<... { B ; C ; D }", "(A (B 1) (C 2) (D 3))", "(A (B 1) (C 2) (D 3))");
}

/**
* The bug reported by @tanloong (https://github.com/stanfordnlp/CoreNLP/issues/1375)
* actually applied to <i>all</i> optional coordination patterns
*<br>
* Here we check that a simpler optional conjunction also fails
*/
public void testOptionalChild() {
runTest("A ?(< B <C)", "(A (B 1) (C 2) (D 3))", "(A (B 1) (C 2) (D 3))");
}

/**
* An optional coordination which doesn't hit should also match exactly once
*/
public void testOptionalChildMiss() {
runTest("A ?(< B < E)", "(A (B 1) (C 2) (D 3))", "(A (B 1) (C 2) (D 3))");
}

/**
* An optional disjunction coordination should match at least once,
* but not match any extra times just because of the optional
*/
public void testOptionalDisjunction() {
// this matches once as an optional, even though none of the children match
runTest("A ?[< E | < F]", "(A (B 1) (C 2) (D 3))", "(A (B 1) (C 2) (D 3))");

// this matches twice
runTest("A ?[< B | < C]", "(A (B 1) (C 2))", "(A (B 1) (C 2))", "(A (B 1) (C 2))");
// this matches once, since the (< E) is useless
runTest("A ?[< B | < E]", "(A (B 1) (C 2) (D 3))", "(A (B 1) (C 2) (D 3))");
// now it will match twice, since the B should match twice
runTest("A ?[< B | < E]", "(A (B 1) (C 2) (B 3))", "(A (B 1) (C 2) (B 3))", "(A (B 1) (C 2) (B 3))");

// check by hand that foo & bar are set as expected for the disjunction matches
// note that the order will be the order of the disjunction then subtrees,
// not sorted by the order of the subtrees
TregexPattern pattern = TregexPattern.compile("A ?[< B=foo | < C=bar]");
Tree tree = treeFromString("(A (B 1) (C 2) (B 3))");
TregexMatcher matcher = pattern.matcher(tree);
assertTrue(matcher.find());
assertEquals("(B 1)", matcher.getNode("foo").toString());
assertNull(matcher.getNode("bar"));

assertTrue(matcher.find());
assertEquals("(B 3)", matcher.getNode("foo").toString());
assertNull(matcher.getNode("bar"));

assertTrue(matcher.find());
assertNull(matcher.getNode("foo"));
assertEquals("(C 2)", matcher.getNode("bar").toString());

assertFalse(matcher.find());

// this example should also work if the same name is used
// for both of the children!
pattern = TregexPattern.compile("A ?[< B=foo | < C=foo]");
matcher = pattern.matcher(tree);
assertTrue(matcher.find());
assertEquals("(B 1)", matcher.getNode("foo").toString());

assertTrue(matcher.find());
assertEquals("(B 3)", matcher.getNode("foo").toString());

assertTrue(matcher.find());
assertEquals("(C 2)", matcher.getNode("foo").toString());

assertFalse(matcher.find());

}

/**
* A user supplied an example of a negated disjunction which went into an infinite loop.
* Apparently no one had ever used a negated disjunction of tree structures before!
Expand Down

0 comments on commit cc7983e

Please sign in to comment.