Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a Java command line tool which converts trees to dependency graph…
…s using protobufs. Included is an update to SemanticGraph.valueOf to set a sentIndex and an option in Tree to yield CoreLabels with the word as the Value instead of the tag
- Loading branch information
1 parent
92771b4
commit b118082
Showing
4 changed files
with
191 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
src/edu/stanford/nlp/trees/ProcessDependencyConverterRequest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package edu.stanford.nlp.trees; | ||
|
||
/** | ||
* A tool to turn Tree objects into dependencies | ||
* | ||
* Only works for English (at least for now) | ||
*/ | ||
|
||
import java.io.InputStream; | ||
import java.io.IOException; | ||
import java.io.OutputStream; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
|
||
import edu.stanford.nlp.ling.IndexedWord; | ||
import edu.stanford.nlp.ling.CoreAnnotations; | ||
import edu.stanford.nlp.pipeline.CoreNLPProtos; | ||
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer; | ||
import edu.stanford.nlp.semgraph.SemanticGraph; | ||
import edu.stanford.nlp.semgraph.SemanticGraphFactory; | ||
import edu.stanford.nlp.trees.Tree; | ||
import edu.stanford.nlp.util.ProcessProtobufRequest; | ||
|
||
public class ProcessDependencyConverterRequest extends ProcessProtobufRequest { | ||
/** | ||
* Convert a single Tree to basic dependencies | ||
*/ | ||
static SemanticGraph convert(Tree tree) { | ||
SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(tree); | ||
return uncollapsedDeps; | ||
} | ||
|
||
/** | ||
* Process a single request, responding with basic dependencies for each tree | ||
*/ | ||
static CoreNLPProtos.DependencyConverterResponse processRequest(CoreNLPProtos.DependencyConverterRequest request) { | ||
ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer(); | ||
CoreNLPProtos.DependencyConverterResponse.Builder responseBuilder = CoreNLPProtos.DependencyConverterResponse.newBuilder(); | ||
|
||
List<CoreNLPProtos.FlattenedParseTree> flattenedTrees = request.getTreesList(); | ||
int treeIdx = 0; | ||
for (CoreNLPProtos.FlattenedParseTree flattenedTree : flattenedTrees) { | ||
Tree tree = ProtobufAnnotationSerializer.fromProto(flattenedTree); | ||
SemanticGraph graph = convert(tree); | ||
for (IndexedWord node : graph.vertexSet()) { | ||
node.set(CoreAnnotations.SentenceIndexAnnotation.class, treeIdx); | ||
} | ||
CoreNLPProtos.DependencyConverterResponse.DependencyConversion.Builder conversionBuilder = CoreNLPProtos.DependencyConverterResponse.DependencyConversion.newBuilder(); | ||
conversionBuilder.setGraph(ProtobufAnnotationSerializer.toProto(graph)); | ||
conversionBuilder.setTree(flattenedTree); | ||
responseBuilder.addConversions(conversionBuilder.build()); | ||
++treeIdx; | ||
} | ||
return responseBuilder.build(); | ||
} | ||
|
||
/** | ||
* Process a single request from a stream, responding with basic dependencies for each tree | ||
*/ | ||
@Override | ||
public void processInputStream(InputStream in, OutputStream out) throws IOException { | ||
CoreNLPProtos.DependencyConverterRequest request = CoreNLPProtos.DependencyConverterRequest.parseFrom(in); | ||
CoreNLPProtos.DependencyConverterResponse response = processRequest(request); | ||
response.writeTo(out); | ||
} | ||
|
||
/** | ||
* The inherited main program will either enhance a single document, | ||
* or will listen to stdin and enhance every document that comes in | ||
* until a terminator is sent or the stream closes | ||
*/ | ||
public static void main(String[] args) throws IOException { | ||
ProcessProtobufRequest.process(new ProcessDependencyConverterRequest(), args); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
72 changes: 72 additions & 0 deletions
72
test/src/edu/stanford/nlp/trees/ProcessDependencyConverterRequestTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
package edu.stanford.nlp.trees; | ||
|
||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
|
||
import org.junit.Assert; | ||
import org.junit.Test; | ||
|
||
import edu.stanford.nlp.ling.CoreLabel; | ||
import edu.stanford.nlp.ling.IndexedWord; | ||
import edu.stanford.nlp.pipeline.CoreNLPProtos; | ||
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer; | ||
import edu.stanford.nlp.semgraph.SemanticGraph; | ||
import edu.stanford.nlp.trees.Tree; | ||
|
||
public class ProcessDependencyConverterRequestTest { | ||
|
||
static CoreNLPProtos.DependencyConverterRequest buildRequest(String ... trees) { | ||
CoreNLPProtos.DependencyConverterRequest.Builder builder = CoreNLPProtos.DependencyConverterRequest.newBuilder(); | ||
|
||
for (String tree : trees) { | ||
Tree t = Tree.valueOf(tree); | ||
builder.addTrees(ProtobufAnnotationSerializer.toFlattenedTree(t)); | ||
} | ||
|
||
return builder.build(); | ||
} | ||
|
||
static void checkResults(CoreNLPProtos.DependencyConverterResponse response, String ... expectedResults) { | ||
Assert.assertEquals(expectedResults.length, response.getConversionsList().size()); | ||
for (int i = 0; i < expectedResults.length; ++i) { | ||
CoreNLPProtos.DependencyGraph responseGraph = response.getConversionsList().get(i).getGraph(); | ||
CoreNLPProtos.FlattenedParseTree responseTree = response.getConversionsList().get(i).getTree(); | ||
Tree tree = ProtobufAnnotationSerializer.fromProto(responseTree); | ||
List<CoreLabel> sentence = tree.taggedLabeledYield(false); | ||
|
||
SemanticGraph expected = SemanticGraph.valueOf(expectedResults[i], i); | ||
SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(responseGraph, sentence, null); | ||
//for (IndexedWord word : expected.vertexSet()) { | ||
// System.out.println(word + " " + word.index() + " " + word.sentIndex() + " " + word.docID()); | ||
//} | ||
//for (IndexedWord word : graph.vertexSet()) { | ||
// System.out.println(word + " " + word.index() + " " + word.sentIndex() + " " + word.docID()); | ||
//} | ||
//System.out.println(expected.toCompactString()); | ||
//System.out.println(graph.toCompactString()); | ||
Assert.assertEquals(expected, graph); | ||
} | ||
} | ||
|
||
/** Test a single Tree turning into Dependencies */ | ||
@Test | ||
public void testOneTree() { | ||
CoreNLPProtos.DependencyConverterRequest request = buildRequest("(ROOT (S (NP (NNP Jennifer)) (VP (VBZ has) (NP (JJ nice) (NNS antennae)))))"); | ||
CoreNLPProtos.DependencyConverterResponse response = ProcessDependencyConverterRequest.processRequest(request); | ||
checkResults(response, "[has/VBZ-1 nsubj>Jennifer/NNP-0 obj>[antennae/NNS-3 amod>nice/JJ-2]]"); | ||
} | ||
|
||
/** Test two trees turning into Dependencies */ | ||
@Test | ||
public void testTwoTrees() { | ||
CoreNLPProtos.DependencyConverterRequest request = buildRequest("(ROOT (S (NP (NNP Jennifer)) (VP (VBZ has) (NP (JJ nice) (NNS antennae)))))", | ||
"(ROOT (S (NP (PRP She)) (VP (VBZ is) (ADJP (RB hella) (JJ basic)) (ADVP (RB though)))))"); | ||
CoreNLPProtos.DependencyConverterResponse response = ProcessDependencyConverterRequest.processRequest(request); | ||
checkResults(response, | ||
"[has/VBZ-1 nsubj>Jennifer/NNP-0 obj>[antennae/NNS-3 amod>nice/JJ-2]]", | ||
"[basic/JJ-3 nsubj>She/PRP-0 cop>is/VBZ-1 advmod>hella/RB-2 advmod>though/RB-4]"); | ||
} | ||
|
||
} | ||
|
||
|