Skip to content

Commit

Permalink
Hopefully fix an issue in OpenIE where graphs can be disconnected aft…
Browse files Browse the repository at this point in the history
…er removing subsets of the extra edges. Includes a more detailed exception for when problem cases come up.
  • Loading branch information
AngledLuffa committed Dec 14, 2021
1 parent be7f056 commit d3a890f
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,8 @@ protected ClauseSplitterSearchProblem(SemanticGraph tree, boolean assumedTruth,
extraEdgesByGovernor.put(vertex, new ArrayList<>());
extraEdgesByDependent.put(vertex, new ArrayList<>());
}
List<SemanticGraphEdge> extraEdges = Util.cleanTree(this.tree);
SemanticGraph originalTree = new SemanticGraph(this.tree);
List<SemanticGraphEdge> extraEdges = Util.cleanTree(this.tree, originalTree);
assert Util.isTree(this.tree);
for (SemanticGraphEdge edge : extraEdges) {
extraEdgesByGovernor.get(edge.getGovernor()).add(edge);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ private List<SearchResult> searchImplementation() {
}
}
// Clean the tree
Util.cleanTree(parseTree);
Util.cleanTree(parseTree, this.parseTree);
assert Util.isTree(parseTree);

// Find the subject / object split
Expand Down
9 changes: 7 additions & 2 deletions src/edu/stanford/nlp/naturalli/NotTreeException.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
import edu.stanford.nlp.semgraph.SemanticGraphEdge;

public class NotTreeException extends RuntimeException {
public NotTreeException(SemanticGraph graph, SemanticGraphEdge edge) {
super("The graph \n" + graph + "\nis not a tree after removing\n" + edge);
SemanticGraph brokenGraph;
SemanticGraph originalGraph;

public NotTreeException(SemanticGraph graph, SemanticGraph originalGraph) {
super("The graph \n" + graph + "\nis not a tree after its surgery. Original graph:\n" + originalGraph);
this.brokenGraph = graph;
this.originalGraph = originalGraph;
}
}
12 changes: 6 additions & 6 deletions src/edu/stanford/nlp/naturalli/OpenIE.java
Original file line number Diff line number Diff line change
Expand Up @@ -462,16 +462,16 @@ public void annotateSentence(CoreMap sentence, Map<CoreLabel, List<CoreLabel>> c
} else {

// Get the dependency tree
SemanticGraph parse = sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
if (parse == null) {
parse = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph originalParse = sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
if (originalParse == null) {
originalParse = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
if (parse == null) {
if (originalParse == null) {
throw new IllegalStateException("Cannot run OpenIE without a parse tree!");
}
// Clean the tree
parse = new SemanticGraph(parse);
Util.cleanTree(parse);
SemanticGraph parse = new SemanticGraph(originalParse);
Util.cleanTree(parse, originalParse);

// Resolve Coreference
SemanticGraph canonicalizedParse = parse;
Expand Down
109 changes: 83 additions & 26 deletions src/edu/stanford/nlp/naturalli/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,29 @@ public static void annotate(CoreMap sentence, AnnotationPipeline pipeline) {
pipeline.annotate(ann);
}

/**
* Verify that when removing edges from the graph, the graph is not disconnected after the removal.
*<br>
* The edge specified in toKeep will be kept.
*/
public static boolean verifyRemoval(SemanticGraph tree, Collection<SemanticGraphEdge> removeEdges, SemanticGraphEdge toKeep, IndexedWord dependent) {
SemanticGraph copy = tree.makeSoftCopy();
IndexedWord root = copy.getFirstRoot();

for (SemanticGraphEdge remove : removeEdges) {
if (remove == toKeep) {
continue;
}
boolean removed = copy.removeEdge(remove);
assert removed;
}
if (copy.getShortestDirectedPathNodes(root, dependent) == null) {
return false;
}
return true;
}


/**
* Fix some bizarre peculiarities with certain trees.
* So far, these include:
Expand All @@ -152,8 +175,8 @@ public static void annotate(CoreMap sentence, AnnotationPipeline pipeline) {
* @param tree The tree to clean (in place!).
* @return A list of extra edges, which are valid but were removed.
*/
public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
// assert !isCyclic(tree);
public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree, SemanticGraph originalTree) {
// assert !isCyclic(tree);

// Clean nodes
List<IndexedWord> toDelete = new ArrayList<>();
Expand Down Expand Up @@ -202,37 +225,69 @@ public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
toDelete.forEach(tree::removeVertex);
for (Triple<IndexedWord, IndexedWord, SemanticGraphEdge> edge : toAdd) {
tree.addEdge(edge.first, edge.second,
edge.third.getRelation(), edge.third.getWeight(), edge.third.isExtra());
edge.third.getRelation(), edge.third.getWeight(), edge.third.isExtra());
}

// Handle extra edges.
// Two cases:
// (1) the extra edge is a subj/obj edge and the main edge is a conj:.*
// in this case, keep the extra
// (2) otherwise, delete the extra
// exception: the case when removing the conj is removing the
// path from the root to this node
// (2) otherwise, delete the extra(s)
List<SemanticGraphEdge> extraEdges = new ArrayList<>();
for (SemanticGraphEdge edge : tree.edgeIterable()) {
if (edge.isExtra()) {
List<SemanticGraphEdge> incomingEdges = tree.incomingEdgeList(edge.getDependent());
SemanticGraphEdge toKeep = null;
for (SemanticGraphEdge candidate : incomingEdges) {
if (toKeep == null) {
toKeep = candidate;
} else if (toKeep.getRelation().toString().startsWith("conj") && candidate.getRelation().toString().matches(".subj.*|.obj.*")) {
toKeep = candidate;
} else if (!candidate.isExtra() &&
!(candidate.getRelation().toString().startsWith("conj") && toKeep.getRelation().toString().matches(".subj.*|.obj.*"))) {
toKeep = candidate;
for (IndexedWord vertex : tree.vertexSet()) {
// can't prune any edges if there is only one incoming edge.
// that would ruin the graph
// conversely, if there is more than one edge, at least one of
// them should be extra
if (tree.inDegree(vertex) <= 1) {
continue;
}

List<SemanticGraphEdge> incomingEdges = tree.incomingEdgeList(vertex);
SemanticGraphEdge toKeep = null;
SemanticGraphEdge original = null;
for (SemanticGraphEdge candidate : incomingEdges) {
if (!candidate.isExtra()) {
if (original != null) {
// TODO(horatio): This would be an ideal place to throw an
// error, but unfortunately even the basic graph can have
// errors with multiple incoming edges as currently constructed.
//throw new IllegalArgumentException("Somehow, the original edge returned\n Original graph:\n" + originalTree +
// "\n Current graph:\n" + tree +
// "\n First edge found: " + original +
// "\n Next edge found: " + candidate);
} else {
// either or both could be wrong, so we don't try to
// figure out which to keep
original = candidate;
}
}
for (SemanticGraphEdge candidate : incomingEdges) {
if (candidate != toKeep) {
extraEdges.add(candidate);
}
if (toKeep == null) {
toKeep = candidate;
} else if (toKeep.getRelation().toString().startsWith("conj") && candidate.getRelation().toString().matches(".subj.*|.obj.*")) {
toKeep = candidate;
} else if (!candidate.isExtra() &&
!(candidate.getRelation().toString().startsWith("conj") && toKeep.getRelation().toString().matches(".subj.*|.obj.*"))) {
toKeep = candidate;
}
}
if (!verifyRemoval(tree, incomingEdges, toKeep, toKeep.getDependent())) {
if (original == null) {
throw new RuntimeException("Could not find a valid edge to remove");
}
toKeep = original;
}
List<SemanticGraphEdge> removeEdges = new ArrayList<>();
for (SemanticGraphEdge candidate : incomingEdges) {
if (candidate != toKeep) {
removeEdges.add(candidate);
}
}
removeEdges.forEach(tree::removeEdge);
extraEdges.addAll(removeEdges);
}
extraEdges.forEach(tree::removeEdge);

// Add apposition edges (simple coref)
for (SemanticGraphEdge extraEdge : new ArrayList<>(extraEdges)) { // note[gabor] prevent concurrent modification exception
Expand Down Expand Up @@ -321,7 +376,9 @@ public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
}

// Return
assert isTree(tree);
if (!isTree(tree)) {
throw new NotTreeException(tree, originalTree);
}
return extraEdges;
}

Expand All @@ -331,7 +388,7 @@ public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
* This replicates the behavior of the old Stanford dependencies on universal dependencies.
* @param tree The tree to modify in place.
*/
public static void stripPrepCases(SemanticGraph tree) {
public static void stripPrepCases(SemanticGraph tree, SemanticGraph originalTree) {
// Find incoming case edges that have an 'nmod' incoming edge
List<SemanticGraphEdge> toClean = new ArrayList<>();
for (SemanticGraphEdge edge : tree.edgeIterable()) {
Expand All @@ -353,9 +410,9 @@ public static void stripPrepCases(SemanticGraph tree) {
for (SemanticGraphEdge edge : toClean) {
tree.removeEdge(edge);
tree.removeVertex(edge.getDependent());
if (!isTree(tree)) {
throw new NotTreeException(tree, edge);
}
}
if (!isTree(tree)) {
throw new NotTreeException(tree, originalTree);
}
}

Expand Down

0 comments on commit d3a890f

Please sign in to comment.