From f99b5ab87f073118a971c4d1e39df85ab9abbab1 Mon Sep 17 00:00:00 2001 From: John Bauer Date: Fri, 12 Aug 2022 13:47:50 -0700 Subject: [PATCH] Remove an optimization in semgrex which may be causing crashes The optimization removed - if it even does provide a noticeable time benefit - can have repeated IdentityHashMap hashCodes for SemanticGraphs over a long run, leading to crashes when a stale object is retrieved for a new graph. The directed graph now throws a specific exception type when a cyclic graph occurs. Catch exactly that exception in SemgrexMatcher (previous versions caught Exception, which is horrifying) --- .../nlp/graph/CyclicGraphException.java | 15 ++++ .../nlp/graph/DirectedMultiGraph.java | 4 +- .../stanford/nlp/semgraph/SemanticGraph.java | 2 +- .../nlp/semgraph/semgrex/SemgrexMatcher.java | 72 ++++++------------- 4 files changed, 39 insertions(+), 54 deletions(-) create mode 100644 src/edu/stanford/nlp/graph/CyclicGraphException.java diff --git a/src/edu/stanford/nlp/graph/CyclicGraphException.java b/src/edu/stanford/nlp/graph/CyclicGraphException.java new file mode 100644 index 0000000000..c5fd3be060 --- /dev/null +++ b/src/edu/stanford/nlp/graph/CyclicGraphException.java @@ -0,0 +1,15 @@ +package edu.stanford.nlp.graph; + +public class CyclicGraphException extends IllegalStateException { + public final String error; + public final DirectedMultiGraph graph; + + public CyclicGraphException(String error, DirectedMultiGraph graph) { + this.error = error; + this.graph = graph; + } + + public String toString() { + return super.toString() + ": " + error + "\nGraph:\n" + graph; + } +} diff --git a/src/edu/stanford/nlp/graph/DirectedMultiGraph.java b/src/edu/stanford/nlp/graph/DirectedMultiGraph.java index c3e7e5598c..68dd9255cc 100644 --- a/src/edu/stanford/nlp/graph/DirectedMultiGraph.java +++ b/src/edu/stanford/nlp/graph/DirectedMultiGraph.java @@ -613,7 +613,7 @@ public void remove() { * Topological sorting only works if the graph is acyclic. * * @return A sorted list of the vertices - * @throws IllegalStateException if this graph is not a DAG + * @throws CyclicGraphException (a subtype of IllegalStateException) if this graph is not a DAG */ public List topologicalSort() { List result = Generics.newArrayList(); @@ -637,7 +637,7 @@ private void topologicalSortHelper(V vertex, Set temporary, Set permanent, continue; } if (temporary.contains(neighbor)) { - throw new IllegalStateException("This graph has cycles. Topological sort not possible: " + this.toString()); + throw new CyclicGraphException("This graph has cycles. Topological sort not possible", this); } topologicalSortHelper(neighbor, temporary, permanent, result); } diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraph.java b/src/edu/stanford/nlp/semgraph/SemanticGraph.java index 668a864727..ddc057f4c6 100644 --- a/src/edu/stanford/nlp/semgraph/SemanticGraph.java +++ b/src/edu/stanford/nlp/semgraph/SemanticGraph.java @@ -899,7 +899,7 @@ public void setRoots(Collection words) { /** * * @return A sorted list of the vertices - * @throws IllegalStateException if this graph is not a DAG + * @throws CyclicGraphException (a subtype of IllegalStateException) if this graph is not a DAG */ public List topologicalSort() { return graph.topologicalSort(); diff --git a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexMatcher.java b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexMatcher.java index e796a858a9..d625dc9e6e 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexMatcher.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexMatcher.java @@ -1,5 +1,6 @@ package edu.stanford.nlp.semgraph.semgrex; +import edu.stanford.nlp.graph.CyclicGraphException; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.ling.*; import edu.stanford.nlp.util.logging.Redwood; @@ -117,67 +118,36 @@ public boolean matchesAt(IndexedWord node) { /** - * Topological sorting actually takes a rather large amount of time, if you call multiple - * patterns on the same tree. - * This is a weak cache that stores all the trees sorted since the garbage collector last kicked in. - * The key on this map is the identity hash code (i.e., memory address) of the semantic graph; the - * value is the sorted list of vertices. - *

- * Note that this optimization will cause strange things to happen if you mutate a semantic graph between - * calls to Semgrex. + * Find the next match of the pattern in the graph. + * + * @return whether there is a match somewhere in the graph */ - private static final WeakHashMap> topologicalSortCache = new WeakHashMap<>(); - - private void setupFindIterator() { - try { + public boolean find() { + // log.info("hyp: " + hyp); + // there was a cache of the topological sorts to reuse across + // SemgrexPatterns which used IdentityHashMap to remember + // SemanticGraphs, but it was apparently the cause of various + // thread safety bugs when the results were used for an old + // SemanticGraph + if (findIterator == null) { if (hyp) { - synchronized (topologicalSortCache) { - List topoSort = topologicalSortCache.get(System.identityHashCode(sg)); - if (topoSort == null || topoSort.size() != sg.size()) { // size check to mitigate a stale cache - topoSort = sg.topologicalSort(); - topologicalSortCache.put(System.identityHashCode(sg), topoSort); - } - findIterator = topoSort.iterator(); + try { + findIterator = sg.topologicalSort().iterator(); + } catch (CyclicGraphException e) { + findIterator = sg.vertexSet().iterator(); } } else if (sg_aligned == null) { - return; + return false; } else { - synchronized (topologicalSortCache) { - List topoSort = topologicalSortCache.get(System.identityHashCode(sg_aligned)); - if (topoSort == null || topoSort.size() != sg_aligned.size()) { // size check to mitigate a stale cache - topoSort = sg_aligned.topologicalSort(); - topologicalSortCache.put(System.identityHashCode(sg_aligned), topoSort); - } - findIterator = topoSort.iterator(); + try { + findIterator = sg_aligned.topologicalSort().iterator(); + } catch (CyclicGraphException e) { + findIterator = sg_aligned.vertexSet().iterator(); } } - } catch (Exception ex) { - if (hyp) { - findIterator = sg.vertexSet().iterator(); - } else if (sg_aligned == null) { - return; - } else { - findIterator = sg_aligned.vertexSet().iterator(); - } } - } - /** - * Find the next match of the pattern in the graph. - * - * @return whether there is a match somewhere in the graph - */ - public boolean find() { - // log.info("hyp: " + hyp); - if (findIterator == null) { - setupFindIterator(); - } - if (findIterator == null) { - return false; - } - // System.out.println("first"); if (findCurrent != null && matches()) { - // log.info("find first: " + findCurrent.word()); return true; } //log.info("here");