Skip to content

Commit

Permalink
Add a DeleteLeaf operation to Ssurgeon. Will delete a leaf (node with…
Browse files Browse the repository at this point in the history
… no children) and rearrange all the indices appropriately.
  • Loading branch information
AngledLuffa committed Apr 5, 2023
1 parent 203eb06 commit 429f61a
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 0 deletions.
60 changes: 60 additions & 0 deletions src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/DeleteLeaf.java
@@ -0,0 +1,60 @@
package edu.stanford.nlp.semgraph.semgrex.ssurgeon;

import java.io.StringWriter;

import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;

/**
* This action removes all incoming edges for the given node.
* @author lumberjack
*
*/
public class DeleteLeaf extends SsurgeonEdit {
public static final String LABEL = "deleteLeaf";
protected String nodeName; // name of this node

public DeleteLeaf(String nodeName) {
this.nodeName = nodeName;
}

/**
* If executed twice on the same node, the second time there
* will be no further updates
*/
@Override
public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
IndexedWord tgtNode = getNamedNode(nodeName, sm);
if (tgtNode == null) {
return false;
}
for (SemanticGraphEdge edge : sg.outgoingEdgeList(tgtNode)) {
// if there are any outgoing edges, we aren't a leaf
return false;
}
boolean deletedEdge = false;
// use incomingEdgeList so that deleting an edge
// doesn't affect the iteration
for (SemanticGraphEdge edge : sg.incomingEdgeList(tgtNode)) {
deletedEdge = deletedEdge || sg.removeEdge(edge);
}
int deletedIndex = tgtNode.index();
boolean deletedNode = sg.removeVertex(tgtNode);
// TODO: renumber
if (deletedNode) {
AddDep.moveNodes(sg, sm, x -> (x >= deletedIndex), x -> x-1, false);
}
return deletedEdge || deletedNode;
}

@Override
public String toEditString() {
StringWriter buf = new StringWriter();
buf.write(LABEL); buf.write("\t");
buf.write(Ssurgeon.NODENAME_ARG); buf.write("\t"); buf.write(nodeName);
return buf.toString();
}

}
10 changes: 10 additions & 0 deletions src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java
Expand Up @@ -87,6 +87,7 @@
* <li> {@code mergeNodes n1 n2}
* <li> {@code killAllIncomingEdges -node node}
* <li> {@code delete -node node}
* <li> {@code deleteLeaf -node node}
* <li> {@code killNonRootedNodes}
* </ul>
*
Expand Down Expand Up @@ -154,6 +155,10 @@
* {@code -node} is the node to delete.
* You will only want to do this after separating the node from the parts of the graph you want to keep.
*</p><p>
* {@code deleteLeaf} deletes a node as long as it is a leaf.
* {@code -node} is the node to delete.
* If the node is not a leaf (no outgoing edges), it will not be deleted.
*</p><p>
* {@code killNonRootedNodes} searches the graph and deletes all nodes which have no path to a root.
*</p>
*<p>
Expand Down Expand Up @@ -544,6 +549,11 @@ public static SsurgeonEdit parseEditLine(String editLine, Map<String, String> at
throw new SsurgeonParseException("Cannot make a DeleteGraphFromNode out of " + argsBox.nodes.size() + " nodes");
}
return new DeleteGraphFromNode(argsBox.nodes.get(0));
} else if (command.equalsIgnoreCase(DeleteLeaf.LABEL)) {
if (argsBox.nodes.size() != 1) {
throw new SsurgeonParseException("Cannot make a DeleteLeaf out of " + argsBox.nodes.size() + " nodes");
}
return new DeleteLeaf(argsBox.nodes.get(0));
} else if (command.equalsIgnoreCase(EditNode.LABEL)) {
if (argsBox.nodes.size() != 1) {
throw new SsurgeonParseException("Cannot make an EditNode out of " + argsBox.nodes.size() + " nodes");
Expand Down
Expand Up @@ -1721,6 +1721,44 @@ public void readXMLOneStepReattach() {
assertEquals(newSg, expected);
}


/**
* Test deleteLeaf, which removes an unwanted leaf and its edges, then renumbers everything
*<br>
* Uses a real example from UD_Portuguese-GSD
*/
@Test
public void readXMLDeleteLeaf() {
String doc = String.join(newline,
"<ssurgeon-pattern-list>",
" <ssurgeon-pattern>",
" <uid>38</uid>",
" <notes>Test deleting a leaf (only if it's a leaf)</notes>",
" <language>UniversalEnglish</language>",
// the real life example used POS tags to make sure "verb" and "clitic" are the right pieces
" <semgrex>" + XMLUtils.escapeXML("{}=verb . ({word:/-/}=dash . {word:se}=clitic)") + "</semgrex>",
" <edit-list>combineMWT -node verb -node dash -node clitic</edit-list>",
" <edit-list>deleteLeaf -node dash</edit-list>",
" </ssurgeon-pattern>",
"</ssurgeon-pattern-list>");
Ssurgeon inst = Ssurgeon.inst();
List<SsurgeonPattern> patterns = inst.readFromString(doc);
assertEquals(patterns.size(), 1);
SsurgeonPattern pattern = patterns.get(0);

// the dash should be removed and all words with an index after the dash should have that index decremented
SemanticGraph sg = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> --4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]");
SemanticGraph expected = SemanticGraph.valueOf("[nobre-5 nmod> [decreto-8 case> com-6 det> o-7] cop> fez-3 expl:pv> [se-4 advmod> [Assim punct> ,-2]]]");
SemanticGraph newSg = pattern.iterate(sg).first;
assertEquals(newSg, expected);

// here, the dash isn't a leaf any more, so it shouldn't be deleted
sg = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> [--4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]]");
expected = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> [--4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]]");
newSg = pattern.iterate(sg).first;
assertEquals(newSg, expected);
}

/**
* Simple test of an Ssurgeon edit script. This instances a simple semantic graph,
* a semgrex pattern, and then the resulting actions over the named nodes in the
Expand Down

0 comments on commit 429f61a

Please sign in to comment.