Skip to content

Commit

Permalink
Remove an optimization in semgrex which may be causing crashes
Browse files Browse the repository at this point in the history
The optimization removed - if it even does provide a noticeable time
benefit - can have repeated IdentityHashMap hashCodes for
SemanticGraphs over a long run, leading to crashes when a stale object
is retrieved for a new graph.

The directed graph now throws a specific exception type when a cyclic
graph occurs.  Catch exactly that exception in SemgrexMatcher
(previous versions caught Exception, which is horrifying)
  • Loading branch information
AngledLuffa committed Aug 29, 2022
1 parent 755edcf commit f99b5ab
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 54 deletions.
15 changes: 15 additions & 0 deletions src/edu/stanford/nlp/graph/CyclicGraphException.java
@@ -0,0 +1,15 @@
package edu.stanford.nlp.graph;

public class CyclicGraphException extends IllegalStateException {
public final String error;
public final DirectedMultiGraph<?, ?> graph;

public CyclicGraphException(String error, DirectedMultiGraph<?, ?> graph) {
this.error = error;
this.graph = graph;
}

public String toString() {
return super.toString() + ": " + error + "\nGraph:\n" + graph;
}
}
4 changes: 2 additions & 2 deletions src/edu/stanford/nlp/graph/DirectedMultiGraph.java
Expand Up @@ -613,7 +613,7 @@ public void remove() {
* Topological sorting only works if the graph is acyclic.
*
* @return A sorted list of the vertices
* @throws IllegalStateException if this graph is not a DAG
* @throws CyclicGraphException (a subtype of IllegalStateException) if this graph is not a DAG
*/
public List<V> topologicalSort() {
List<V> result = Generics.newArrayList();
Expand All @@ -637,7 +637,7 @@ private void topologicalSortHelper(V vertex, Set<V> temporary, Set<V> permanent,
continue;
}
if (temporary.contains(neighbor)) {
throw new IllegalStateException("This graph has cycles. Topological sort not possible: " + this.toString());
throw new CyclicGraphException("This graph has cycles. Topological sort not possible", this);
}
topologicalSortHelper(neighbor, temporary, permanent, result);
}
Expand Down
2 changes: 1 addition & 1 deletion src/edu/stanford/nlp/semgraph/SemanticGraph.java
Expand Up @@ -899,7 +899,7 @@ public void setRoots(Collection<IndexedWord> words) {
/**
*
* @return A sorted list of the vertices
* @throws IllegalStateException if this graph is not a DAG
* @throws CyclicGraphException (a subtype of IllegalStateException) if this graph is not a DAG
*/
public List<IndexedWord> topologicalSort() {
return graph.topologicalSort();
Expand Down
72 changes: 21 additions & 51 deletions src/edu/stanford/nlp/semgraph/semgrex/SemgrexMatcher.java
@@ -1,5 +1,6 @@
package edu.stanford.nlp.semgraph.semgrex;

import edu.stanford.nlp.graph.CyclicGraphException;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.util.logging.Redwood;
Expand Down Expand Up @@ -117,67 +118,36 @@ public boolean matchesAt(IndexedWord node) {


/**
* Topological sorting actually takes a rather large amount of time, if you call multiple
* patterns on the same tree.
* This is a weak cache that stores all the trees sorted since the garbage collector last kicked in.
* The key on this map is the identity hash code (i.e., memory address) of the semantic graph; the
* value is the sorted list of vertices.
* <p>
* Note that this optimization will cause strange things to happen if you mutate a semantic graph between
* calls to Semgrex.
* Find the next match of the pattern in the graph.
*
* @return whether there is a match somewhere in the graph
*/
private static final WeakHashMap<Integer, List<IndexedWord>> topologicalSortCache = new WeakHashMap<>();

private void setupFindIterator() {
try {
public boolean find() {
// log.info("hyp: " + hyp);
// there was a cache of the topological sorts to reuse across
// SemgrexPatterns which used IdentityHashMap to remember
// SemanticGraphs, but it was apparently the cause of various
// thread safety bugs when the results were used for an old
// SemanticGraph
if (findIterator == null) {
if (hyp) {
synchronized (topologicalSortCache) {
List<IndexedWord> topoSort = topologicalSortCache.get(System.identityHashCode(sg));
if (topoSort == null || topoSort.size() != sg.size()) { // size check to mitigate a stale cache
topoSort = sg.topologicalSort();
topologicalSortCache.put(System.identityHashCode(sg), topoSort);
}
findIterator = topoSort.iterator();
try {
findIterator = sg.topologicalSort().iterator();
} catch (CyclicGraphException e) {
findIterator = sg.vertexSet().iterator();
}
} else if (sg_aligned == null) {
return;
return false;
} else {
synchronized (topologicalSortCache) {
List<IndexedWord> topoSort = topologicalSortCache.get(System.identityHashCode(sg_aligned));
if (topoSort == null || topoSort.size() != sg_aligned.size()) { // size check to mitigate a stale cache
topoSort = sg_aligned.topologicalSort();
topologicalSortCache.put(System.identityHashCode(sg_aligned), topoSort);
}
findIterator = topoSort.iterator();
try {
findIterator = sg_aligned.topologicalSort().iterator();
} catch (CyclicGraphException e) {
findIterator = sg_aligned.vertexSet().iterator();
}
}
} catch (Exception ex) {
if (hyp) {
findIterator = sg.vertexSet().iterator();
} else if (sg_aligned == null) {
return;
} else {
findIterator = sg_aligned.vertexSet().iterator();
}
}
}

/**
* Find the next match of the pattern in the graph.
*
* @return whether there is a match somewhere in the graph
*/
public boolean find() {
// log.info("hyp: " + hyp);
if (findIterator == null) {
setupFindIterator();
}
if (findIterator == null) {
return false;
}
// System.out.println("first");
if (findCurrent != null && matches()) {
// log.info("find first: " + findCurrent.word());
return true;
}
//log.info("here");
Expand Down

0 comments on commit f99b5ab

Please sign in to comment.