Skip to content

Commit

Permalink
Add a Java command line tool which converts trees to dependency graph…
Browse files Browse the repository at this point in the history
…s using protobufs. Included is an update to SemanticGraph.valueOf to set a sentIndex and an option in Tree to yield CoreLabels with the word as the Value instead of the tag
  • Loading branch information
AngledLuffa committed Jan 2, 2023
1 parent 92771b4 commit b118082
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 10 deletions.
30 changes: 24 additions & 6 deletions src/edu/stanford/nlp/semgraph/SemanticGraph.java
Expand Up @@ -1693,18 +1693,31 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
*
* This is the same format generated by toCompactString().
*/
public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) {
return (new SemanticGraphParsingTask(s, language, sentIndex)).parse();
}

/**
* @see SemanticGraph#valueOf(String, Language, Integer)
*/
public static SemanticGraph valueOf(String s, Language language) {
return (new SemanticGraphParsingTask(s, language)).parse();
}

/**
*
* @see SemanticGraph#valueOf(String, Language)
* @see SemanticGraph#valueOf(String, Language, Integer)
*/
public static SemanticGraph valueOf(String s) {
return valueOf(s, Language.UniversalEnglish);
}

/**
* @see SemanticGraph#valueOf(String, Language, Integer)
*/
public static SemanticGraph valueOf(String s, int sentIndex) {
return valueOf(s, Language.UniversalEnglish, sentIndex);
}


public SemanticGraph() {
graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory);
Expand Down Expand Up @@ -1838,16 +1851,21 @@ private static class SemanticGraphParsingTask extends StringParsingTask<Semantic

private SemanticGraph sg;
private Set<Integer> indexesUsed = Generics.newHashSet();
private Language language;

private final Language language;
private final Integer sentIndex;

public SemanticGraphParsingTask(String s) {
this(s, Language.UniversalEnglish);
this(s, Language.UniversalEnglish, null);
}

public SemanticGraphParsingTask(String s, Language language) {
this(s, language, null);
}

public SemanticGraphParsingTask(String s, Language language, Integer sentIndex) {
super(s);
this.language = language;
this.sentIndex = sentIndex;
}

@Override
Expand Down Expand Up @@ -1909,7 +1927,7 @@ private IndexedWord makeVertex(String word) {
// nothing is actually enforcing that no indexes are used twice. This
// could occur if some words in the string representation being parsed
// come with index markers and some do not.
IndexedWord ifl = new IndexedWord(null, 0, index);
IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index);
// log.info("SemanticGraphParsingTask>>> word = " + word);
// log.info("SemanticGraphParsingTask>>> index = " + index);
// log.info("SemanticGraphParsingTask>>> indexesUsed = " +
Expand Down
75 changes: 75 additions & 0 deletions src/edu/stanford/nlp/trees/ProcessDependencyConverterRequest.java
@@ -0,0 +1,75 @@
package edu.stanford.nlp.trees;

/**
* A tool to turn Tree objects into dependencies
*
* Only works for English (at least for now)
*/

import java.io.InputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import java.util.stream.Collectors;

import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.ProcessProtobufRequest;

public class ProcessDependencyConverterRequest extends ProcessProtobufRequest {
/**
* Convert a single Tree to basic dependencies
*/
static SemanticGraph convert(Tree tree) {
SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(tree);
return uncollapsedDeps;
}

/**
* Process a single request, responding with basic dependencies for each tree
*/
static CoreNLPProtos.DependencyConverterResponse processRequest(CoreNLPProtos.DependencyConverterRequest request) {
ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer();
CoreNLPProtos.DependencyConverterResponse.Builder responseBuilder = CoreNLPProtos.DependencyConverterResponse.newBuilder();

List<CoreNLPProtos.FlattenedParseTree> flattenedTrees = request.getTreesList();
int treeIdx = 0;
for (CoreNLPProtos.FlattenedParseTree flattenedTree : flattenedTrees) {
Tree tree = ProtobufAnnotationSerializer.fromProto(flattenedTree);
SemanticGraph graph = convert(tree);
for (IndexedWord node : graph.vertexSet()) {
node.set(CoreAnnotations.SentenceIndexAnnotation.class, treeIdx);
}
CoreNLPProtos.DependencyConverterResponse.DependencyConversion.Builder conversionBuilder = CoreNLPProtos.DependencyConverterResponse.DependencyConversion.newBuilder();
conversionBuilder.setGraph(ProtobufAnnotationSerializer.toProto(graph));
conversionBuilder.setTree(flattenedTree);
responseBuilder.addConversions(conversionBuilder.build());
++treeIdx;
}
return responseBuilder.build();
}

/**
* Process a single request from a stream, responding with basic dependencies for each tree
*/
@Override
public void processInputStream(InputStream in, OutputStream out) throws IOException {
CoreNLPProtos.DependencyConverterRequest request = CoreNLPProtos.DependencyConverterRequest.parseFrom(in);
CoreNLPProtos.DependencyConverterResponse response = processRequest(request);
response.writeTo(out);
}

/**
* The inherited main program will either enhance a single document,
* or will listen to stdin and enhance every document that comes in
* until a terminator is sent or the stream closes
*/
public static void main(String[] args) throws IOException {
ProcessProtobufRequest.process(new ProcessDependencyConverterRequest(), args);
}
}
24 changes: 20 additions & 4 deletions src/edu/stanford/nlp/trees/Tree.java
Expand Up @@ -1625,12 +1625,24 @@ public List<LabeledWord> labeledYield(List<LabeledWord> ty) {
* @return A tagged, labeled yield.
*/
public List<CoreLabel> taggedLabeledYield() {
return taggedLabeledYield(true);
}


/** Returns a {@code List<CoreLabel>} from the tree.
* These are a copy of the complete token representation
* along with the tag.
*
* @param tagValues use the tags for the values (otherwise use the leaf)
* @return A tagged, labeled yield.
*/
public List<CoreLabel> taggedLabeledYield(boolean tagValues) {
List<CoreLabel> ty = new ArrayList<>();
taggedLabeledYield(ty, 0);
taggedLabeledYield(ty, 0, tagValues);
return ty;
}

private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
private int taggedLabeledYield(List<CoreLabel> ty, int termIdx, boolean tagValues) {
if (isPreTerminal()) {
// usually this will fill in all the usual keys for a token
CoreLabel taggedWord = new CoreLabel(firstChild().label());
Expand All @@ -1640,7 +1652,11 @@ private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
}
final String tag = (value() == null) ? "" : value();
// set value and tag to the tag
taggedWord.setValue(tag);
if (tagValues) {
taggedWord.setValue(tag);
} else {
taggedWord.setValue(taggedWord.word());
}
taggedWord.setTag(tag);
taggedWord.setIndex(termIdx);
ty.add(taggedWord);
Expand All @@ -1649,7 +1665,7 @@ private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {

} else {
for (Tree kid : getChildrenAsList())
termIdx = kid.taggedLabeledYield(ty, termIdx);
termIdx = kid.taggedLabeledYield(ty, termIdx, tagValues);
}

return termIdx;
Expand Down
@@ -0,0 +1,72 @@
package edu.stanford.nlp.trees;

import java.util.List;
import java.util.stream.Collectors;

import org.junit.Assert;
import org.junit.Test;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.trees.Tree;

public class ProcessDependencyConverterRequestTest {

static CoreNLPProtos.DependencyConverterRequest buildRequest(String ... trees) {
CoreNLPProtos.DependencyConverterRequest.Builder builder = CoreNLPProtos.DependencyConverterRequest.newBuilder();

for (String tree : trees) {
Tree t = Tree.valueOf(tree);
builder.addTrees(ProtobufAnnotationSerializer.toFlattenedTree(t));
}

return builder.build();
}

static void checkResults(CoreNLPProtos.DependencyConverterResponse response, String ... expectedResults) {
Assert.assertEquals(expectedResults.length, response.getConversionsList().size());
for (int i = 0; i < expectedResults.length; ++i) {
CoreNLPProtos.DependencyGraph responseGraph = response.getConversionsList().get(i).getGraph();
CoreNLPProtos.FlattenedParseTree responseTree = response.getConversionsList().get(i).getTree();
Tree tree = ProtobufAnnotationSerializer.fromProto(responseTree);
List<CoreLabel> sentence = tree.taggedLabeledYield(false);

SemanticGraph expected = SemanticGraph.valueOf(expectedResults[i], i);
SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(responseGraph, sentence, null);
//for (IndexedWord word : expected.vertexSet()) {
// System.out.println(word + " " + word.index() + " " + word.sentIndex() + " " + word.docID());
//}
//for (IndexedWord word : graph.vertexSet()) {
// System.out.println(word + " " + word.index() + " " + word.sentIndex() + " " + word.docID());
//}
//System.out.println(expected.toCompactString());
//System.out.println(graph.toCompactString());
Assert.assertEquals(expected, graph);
}
}

/** Test a single Tree turning into Dependencies */
@Test
public void testOneTree() {
CoreNLPProtos.DependencyConverterRequest request = buildRequest("(ROOT (S (NP (NNP Jennifer)) (VP (VBZ has) (NP (JJ nice) (NNS antennae)))))");
CoreNLPProtos.DependencyConverterResponse response = ProcessDependencyConverterRequest.processRequest(request);
checkResults(response, "[has/VBZ-1 nsubj>Jennifer/NNP-0 obj>[antennae/NNS-3 amod>nice/JJ-2]]");
}

/** Test two trees turning into Dependencies */
@Test
public void testTwoTrees() {
CoreNLPProtos.DependencyConverterRequest request = buildRequest("(ROOT (S (NP (NNP Jennifer)) (VP (VBZ has) (NP (JJ nice) (NNS antennae)))))",
"(ROOT (S (NP (PRP She)) (VP (VBZ is) (ADJP (RB hella) (JJ basic)) (ADVP (RB though)))))");
CoreNLPProtos.DependencyConverterResponse response = ProcessDependencyConverterRequest.processRequest(request);
checkResults(response,
"[has/VBZ-1 nsubj>Jennifer/NNP-0 obj>[antennae/NNS-3 amod>nice/JJ-2]]",
"[basic/JJ-3 nsubj>She/PRP-0 cop>is/VBZ-1 advmod>hella/RB-2 advmod>though/RB-4]");
}

}


0 comments on commit b118082

Please sign in to comment.