diff --git a/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/DeleteLeaf.java b/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/DeleteLeaf.java new file mode 100644 index 0000000000..62f960124a --- /dev/null +++ b/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/DeleteLeaf.java @@ -0,0 +1,60 @@ +package edu.stanford.nlp.semgraph.semgrex.ssurgeon; + +import java.io.StringWriter; + +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher; +import edu.stanford.nlp.semgraph.SemanticGraph; +import edu.stanford.nlp.semgraph.SemanticGraphEdge; + +/** + * This action removes all incoming edges for the given node. + * @author lumberjack + * + */ +public class DeleteLeaf extends SsurgeonEdit { + public static final String LABEL = "deleteLeaf"; + protected String nodeName; // name of this node + + public DeleteLeaf(String nodeName) { + this.nodeName = nodeName; + } + + /** + * If executed twice on the same node, the second time there + * will be no further updates + */ + @Override + public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) { + IndexedWord tgtNode = getNamedNode(nodeName, sm); + if (tgtNode == null) { + return false; + } + for (SemanticGraphEdge edge : sg.outgoingEdgeList(tgtNode)) { + // if there are any outgoing edges, we aren't a leaf + return false; + } + boolean deletedEdge = false; + // use incomingEdgeList so that deleting an edge + // doesn't affect the iteration + for (SemanticGraphEdge edge : sg.incomingEdgeList(tgtNode)) { + deletedEdge = deletedEdge || sg.removeEdge(edge); + } + int deletedIndex = tgtNode.index(); + boolean deletedNode = sg.removeVertex(tgtNode); + // TODO: renumber + if (deletedNode) { + AddDep.moveNodes(sg, sm, x -> (x >= deletedIndex), x -> x-1, false); + } + return deletedEdge || deletedNode; + } + + @Override + public String toEditString() { + StringWriter buf = new StringWriter(); + buf.write(LABEL); buf.write("\t"); + buf.write(Ssurgeon.NODENAME_ARG); buf.write("\t"); buf.write(nodeName); + return buf.toString(); + } + +} diff --git a/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java b/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java index f3be7dd792..5651642cfa 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java @@ -87,6 +87,7 @@ *
  • {@code mergeNodes n1 n2} *
  • {@code killAllIncomingEdges -node node} *
  • {@code delete -node node} + *
  • {@code deleteLeaf -node node} *
  • {@code killNonRootedNodes} * * @@ -154,6 +155,10 @@ * {@code -node} is the node to delete. * You will only want to do this after separating the node from the parts of the graph you want to keep. *

    + * {@code deleteLeaf} deletes a node as long as it is a leaf. + * {@code -node} is the node to delete. + * If the node is not a leaf (no outgoing edges), it will not be deleted. + *

    * {@code killNonRootedNodes} searches the graph and deletes all nodes which have no path to a root. *

    *

    @@ -544,6 +549,11 @@ public static SsurgeonEdit parseEditLine(String editLine, Map at throw new SsurgeonParseException("Cannot make a DeleteGraphFromNode out of " + argsBox.nodes.size() + " nodes"); } return new DeleteGraphFromNode(argsBox.nodes.get(0)); + } else if (command.equalsIgnoreCase(DeleteLeaf.LABEL)) { + if (argsBox.nodes.size() != 1) { + throw new SsurgeonParseException("Cannot make a DeleteLeaf out of " + argsBox.nodes.size() + " nodes"); + } + return new DeleteLeaf(argsBox.nodes.get(0)); } else if (command.equalsIgnoreCase(EditNode.LABEL)) { if (argsBox.nodes.size() != 1) { throw new SsurgeonParseException("Cannot make an EditNode out of " + argsBox.nodes.size() + " nodes"); diff --git a/test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java b/test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java index 8de368441f..6f788bd682 100644 --- a/test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java +++ b/test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java @@ -1721,6 +1721,44 @@ public void readXMLOneStepReattach() { assertEquals(newSg, expected); } + + /** + * Test deleteLeaf, which removes an unwanted leaf and its edges, then renumbers everything + *
    + * Uses a real example from UD_Portuguese-GSD + */ + @Test + public void readXMLDeleteLeaf() { + String doc = String.join(newline, + "", + " ", + " 38", + " Test deleting a leaf (only if it's a leaf)", + " UniversalEnglish", + // the real life example used POS tags to make sure "verb" and "clitic" are the right pieces + " " + XMLUtils.escapeXML("{}=verb . ({word:/-/}=dash . {word:se}=clitic)") + "", + " combineMWT -node verb -node dash -node clitic", + " deleteLeaf -node dash", + " ", + ""); + Ssurgeon inst = Ssurgeon.inst(); + List patterns = inst.readFromString(doc); + assertEquals(patterns.size(), 1); + SsurgeonPattern pattern = patterns.get(0); + + // the dash should be removed and all words with an index after the dash should have that index decremented + SemanticGraph sg = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> --4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]"); + SemanticGraph expected = SemanticGraph.valueOf("[nobre-5 nmod> [decreto-8 case> com-6 det> o-7] cop> fez-3 expl:pv> [se-4 advmod> [Assim punct> ,-2]]]"); + SemanticGraph newSg = pattern.iterate(sg).first; + assertEquals(newSg, expected); + + // here, the dash isn't a leaf any more, so it shouldn't be deleted + sg = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> [--4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]]"); + expected = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> [--4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]]"); + newSg = pattern.iterate(sg).first; + assertEquals(newSg, expected); + } + /** * Simple test of an Ssurgeon edit script. This instances a simple semantic graph, * a semgrex pattern, and then the resulting actions over the named nodes in the