From b118082c1403c8ad7d3c18fc7a211d24fcccb173 Mon Sep 17 00:00:00 2001
From: John Bauer <horatio@gmail.com>
Date: Mon, 2 Jan 2023 12:08:07 -0800
Subject: [PATCH] Add a Java command line tool which converts trees to
 dependency graphs using protobufs.  Included is an update to
 SemanticGraph.valueOf to set a sentIndex and an option in Tree to yield
 CoreLabels with the word as the Value instead of the tag

---
 .../stanford/nlp/semgraph/SemanticGraph.java  | 30 ++++++--
 .../ProcessDependencyConverterRequest.java    | 75 +++++++++++++++++++
 src/edu/stanford/nlp/trees/Tree.java          | 24 +++++-
 ...ProcessDependencyConverterRequestTest.java | 72 ++++++++++++++++++
 4 files changed, 191 insertions(+), 10 deletions(-)
 create mode 100644 src/edu/stanford/nlp/trees/ProcessDependencyConverterRequest.java
 create mode 100644 test/src/edu/stanford/nlp/trees/ProcessDependencyConverterRequestTest.java
diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraph.java b/src/edu/stanford/nlp/semgraph/SemanticGraph.java
index ddc057f4c6..613e6a6019 100644
--- a/src/edu/stanford/nlp/semgraph/SemanticGraph.java
+++ b/src/edu/stanford/nlp/semgraph/SemanticGraph.java
@@ -1693,18 +1693,31 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
    *
    * This is the same format generated by toCompactString().
    */
+  public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) {
+    return (new SemanticGraphParsingTask(s, language, sentIndex)).parse();
+  }
+
+  /**
+   * @see SemanticGraph#valueOf(String, Language, Integer)
+   */
   public static SemanticGraph valueOf(String s, Language language) {
     return (new SemanticGraphParsingTask(s, language)).parse();
   }
 
   /**
-   *
-   * @see SemanticGraph#valueOf(String, Language)
+   * @see SemanticGraph#valueOf(String, Language, Integer)
    */
   public static SemanticGraph valueOf(String s) {
     return valueOf(s, Language.UniversalEnglish);
   }
 
+  /**
+   * @see SemanticGraph#valueOf(String, Language, Integer)
+   */
+  public static SemanticGraph valueOf(String s, int sentIndex) {
+    return valueOf(s, Language.UniversalEnglish, sentIndex);
+  }
+
 
   public SemanticGraph() {
     graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory);
@@ -1838,16 +1851,21 @@ private static class SemanticGraphParsingTask extends StringParsingTask<Semantic
 
     private SemanticGraph sg;
     private Set<Integer> indexesUsed = Generics.newHashSet();
-    private Language language;
-
+    private final Language language;
+    private final Integer sentIndex;
 
     public SemanticGraphParsingTask(String s) {
-      this(s, Language.UniversalEnglish);
+      this(s, Language.UniversalEnglish, null);
     }
 
     public SemanticGraphParsingTask(String s, Language language) {
+      this(s, language, null);
+    }
+
+    public SemanticGraphParsingTask(String s, Language language, Integer sentIndex) {
       super(s);
       this.language = language;
+      this.sentIndex = sentIndex;
     }
 
     @Override
@@ -1909,7 +1927,7 @@ private IndexedWord makeVertex(String word) {
       // nothing is actually enforcing that no indexes are used twice. This
       // could occur if some words in the string representation being parsed
       // come with index markers and some do not.
-      IndexedWord ifl = new IndexedWord(null, 0, index);
+      IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index);
       // log.info("SemanticGraphParsingTask>>> word = " + word);
       // log.info("SemanticGraphParsingTask>>> index = " + index);
       // log.info("SemanticGraphParsingTask>>> indexesUsed = " +
diff --git a/src/edu/stanford/nlp/trees/ProcessDependencyConverterRequest.java b/src/edu/stanford/nlp/trees/ProcessDependencyConverterRequest.java
new file mode 100644
index 0000000000..a6486575df
--- /dev/null
+++ b/src/edu/stanford/nlp/trees/ProcessDependencyConverterRequest.java
@@ -0,0 +1,75 @@
+package edu.stanford.nlp.trees;
+
+/**
+ * A tool to turn Tree objects into dependencies
+ *
+ * Only works for English (at least for now)
+ */
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.pipeline.CoreNLPProtos;
+import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.semgraph.SemanticGraphFactory;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.util.ProcessProtobufRequest;
+
+public class ProcessDependencyConverterRequest extends ProcessProtobufRequest {
+  /**
+   * Convert a single Tree to basic dependencies
+   */
+  static SemanticGraph convert(Tree tree) {
+    SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(tree);
+    return uncollapsedDeps;
+  }
+
+  /**
+   * Process a single request, responding with basic dependencies for each tree
+   */
+  static CoreNLPProtos.DependencyConverterResponse processRequest(CoreNLPProtos.DependencyConverterRequest request) {
+    ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer();
+    CoreNLPProtos.DependencyConverterResponse.Builder responseBuilder = CoreNLPProtos.DependencyConverterResponse.newBuilder();
+
+    List<CoreNLPProtos.FlattenedParseTree> flattenedTrees = request.getTreesList();
+    int treeIdx = 0;
+    for (CoreNLPProtos.FlattenedParseTree flattenedTree : flattenedTrees) {
+      Tree tree = ProtobufAnnotationSerializer.fromProto(flattenedTree);
+      SemanticGraph graph = convert(tree);
+      for (IndexedWord node : graph.vertexSet()) {
+        node.set(CoreAnnotations.SentenceIndexAnnotation.class, treeIdx);
+      }
+      CoreNLPProtos.DependencyConverterResponse.DependencyConversion.Builder conversionBuilder = CoreNLPProtos.DependencyConverterResponse.DependencyConversion.newBuilder();
+      conversionBuilder.setGraph(ProtobufAnnotationSerializer.toProto(graph));
+      conversionBuilder.setTree(flattenedTree);
+      responseBuilder.addConversions(conversionBuilder.build());
+      ++treeIdx;
+    }
+    return responseBuilder.build();
+  }
+
+  /**
+   * Process a single request from a stream, responding with basic dependencies for each tree
+   */
+  @Override
+  public void processInputStream(InputStream in, OutputStream out) throws IOException {
+    CoreNLPProtos.DependencyConverterRequest request = CoreNLPProtos.DependencyConverterRequest.parseFrom(in);
+    CoreNLPProtos.DependencyConverterResponse response = processRequest(request);
+    response.writeTo(out);
+  }
+
+  /**
+   * The inherited main program will either enhance a single document,
+   * or will listen to stdin and enhance every document that comes in
+   * until a terminator is sent or the stream closes
+   */
+  public static void main(String[] args) throws IOException {
+    ProcessProtobufRequest.process(new ProcessDependencyConverterRequest(), args);
+  }
+}
diff --git a/src/edu/stanford/nlp/trees/Tree.java b/src/edu/stanford/nlp/trees/Tree.java
index e79363438d..440e66a17f 100644
--- a/src/edu/stanford/nlp/trees/Tree.java
+++ b/src/edu/stanford/nlp/trees/Tree.java
@@ -1625,12 +1625,24 @@ public List<LabeledWord> labeledYield(List<LabeledWord> ty) {
    *  @return A tagged, labeled yield.
    */
   public List<CoreLabel> taggedLabeledYield() {
+    return taggedLabeledYield(true);
+  }
+
+
+  /** Returns a {@code List<CoreLabel>} from the tree.
+   *  These are a copy of the complete token representation
+   *  along with the tag.
+   *
+   *  @param tagValues use the tags for the values (otherwise use the leaf)
+   *  @return A tagged, labeled yield.
+   */
+  public List<CoreLabel> taggedLabeledYield(boolean tagValues) {
     List<CoreLabel> ty = new ArrayList<>();
-    taggedLabeledYield(ty, 0);
+    taggedLabeledYield(ty, 0, tagValues);
     return ty;
   }
 
-  private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
+  private int taggedLabeledYield(List<CoreLabel> ty, int termIdx, boolean tagValues) {
     if (isPreTerminal()) {
       // usually this will fill in all the usual keys for a token
       CoreLabel taggedWord = new CoreLabel(firstChild().label());
@@ -1640,7 +1652,11 @@ private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
       }
       final String tag = (value() == null) ? "" : value();
       // set value and tag to the tag
-      taggedWord.setValue(tag);
+      if (tagValues) {
+        taggedWord.setValue(tag);
+      } else {
+        taggedWord.setValue(taggedWord.word());
+      }
       taggedWord.setTag(tag);
       taggedWord.setIndex(termIdx);
       ty.add(taggedWord);
@@ -1649,7 +1665,7 @@ private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
 
     } else {
       for (Tree kid : getChildrenAsList())
-        termIdx = kid.taggedLabeledYield(ty, termIdx);
+        termIdx = kid.taggedLabeledYield(ty, termIdx, tagValues);
     }
 
     return termIdx;
diff --git a/test/src/edu/stanford/nlp/trees/ProcessDependencyConverterRequestTest.java b/test/src/edu/stanford/nlp/trees/ProcessDependencyConverterRequestTest.java
new file mode 100644
index 0000000000..f341b52423
--- /dev/null
+++ b/test/src/edu/stanford/nlp/trees/ProcessDependencyConverterRequestTest.java
@@ -0,0 +1,72 @@
+package edu.stanford.nlp.trees;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.pipeline.CoreNLPProtos;
+import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.trees.Tree;
+
+public class ProcessDependencyConverterRequestTest {
+
+  static CoreNLPProtos.DependencyConverterRequest buildRequest(String ... trees) {
+    CoreNLPProtos.DependencyConverterRequest.Builder builder = CoreNLPProtos.DependencyConverterRequest.newBuilder();
+
+    for (String tree : trees) {
+      Tree t = Tree.valueOf(tree);
+      builder.addTrees(ProtobufAnnotationSerializer.toFlattenedTree(t));      
+    }
+
+    return builder.build();
+  }
+
+  static void checkResults(CoreNLPProtos.DependencyConverterResponse response, String ... expectedResults) {
+    Assert.assertEquals(expectedResults.length, response.getConversionsList().size());
+    for (int i = 0; i < expectedResults.length; ++i) {
+      CoreNLPProtos.DependencyGraph responseGraph = response.getConversionsList().get(i).getGraph();
+      CoreNLPProtos.FlattenedParseTree responseTree = response.getConversionsList().get(i).getTree();
+      Tree tree = ProtobufAnnotationSerializer.fromProto(responseTree);
+      List<CoreLabel> sentence = tree.taggedLabeledYield(false);
+
+      SemanticGraph expected = SemanticGraph.valueOf(expectedResults[i], i);
+      SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(responseGraph, sentence, null);
+      //for (IndexedWord word : expected.vertexSet()) {
+      //  System.out.println(word + " " + word.index() + " " + word.sentIndex() + " " + word.docID());
+      //}
+      //for (IndexedWord word : graph.vertexSet()) {
+      //  System.out.println(word + " " + word.index() + " " + word.sentIndex() + " " + word.docID());
+      //}
+      //System.out.println(expected.toCompactString());
+      //System.out.println(graph.toCompactString());
+      Assert.assertEquals(expected, graph);
+    }
+  }
+
+  /** Test a single Tree turning into Dependencies */
+  @Test
+  public void testOneTree() {
+    CoreNLPProtos.DependencyConverterRequest request = buildRequest("(ROOT (S (NP (NNP Jennifer)) (VP (VBZ has) (NP (JJ nice) (NNS antennae)))))");
+    CoreNLPProtos.DependencyConverterResponse response = ProcessDependencyConverterRequest.processRequest(request);
+    checkResults(response, "[has/VBZ-1 nsubj>Jennifer/NNP-0 obj>[antennae/NNS-3 amod>nice/JJ-2]]");
+  }
+
+  /** Test two trees turning into Dependencies */
+  @Test
+  public void testTwoTrees() {
+    CoreNLPProtos.DependencyConverterRequest request = buildRequest("(ROOT (S (NP (NNP Jennifer)) (VP (VBZ has) (NP (JJ nice) (NNS antennae)))))",
+                                                                    "(ROOT (S (NP (PRP She)) (VP (VBZ is) (ADJP (RB hella) (JJ basic)) (ADVP (RB though)))))");
+    CoreNLPProtos.DependencyConverterResponse response = ProcessDependencyConverterRequest.processRequest(request);
+    checkResults(response,
+                 "[has/VBZ-1 nsubj>Jennifer/NNP-0 obj>[antennae/NNS-3 amod>nice/JJ-2]]",
+                 "[basic/JJ-3 nsubj>She/PRP-0 cop>is/VBZ-1 advmod>hella/RB-2 advmod>though/RB-4]");
+  }
+
+}
+
+