Merge remote-tracking branch 'origin/master'

stanfordnlp · Apr 23, 2016 · 2937adc · 2937adc
1 parent c74bfa1
commit 2937adc
Show file tree

Hide file tree

Showing 14 changed files with 382 additions and 128 deletions.
diff --git a/build.xml b/build.xml
@@ -133,6 +133,11 @@
         <exclude name="**/*.java"/>
       </fileset>
     </copy>
+    <copy todir="${build.path}/edu/stanford/nlp/pipeline">
+      <fileset dir="${source.path}/edu/stanford/nlp/pipeline">
+        <exclude name="**/*.java"/>
+      </fileset>
+    </copy>
   </target>
 
   <target name="test" depends="classpath,compile"

diff --git a/src/edu/stanford/nlp/naturalli/ClauseSplitterSearchProblem.java b/src/edu/stanford/nlp/naturalli/ClauseSplitterSearchProblem.java
@@ -30,7 +30,7 @@
  * <p>
  *   For usage at test time, load a model from
  *   {@link ClauseSplitter#load(String)}, and then take the top clauses of a given tree
- *   with {@link ClauseSplitterSearchProblem#topClauses(double)}, yielding a list of
+ *   with {@link ClauseSplitterSearchProblem#topClauses(double, int)}, yielding a list of
  *   {@link edu.stanford.nlp.naturalli.SentenceFragment}s.
  * </p>
  * <pre>
@@ -485,10 +485,13 @@ private CoreLabel mockNode(CoreLabel toCopy, String word, String POS) {
   /**
    * Get the top few clauses from this searcher, cutting off at the given minimum
    * probability.
+   *
    * @param thresholdProbability The threshold under which to stop returning clauses. This should be between 0 and 1.
+   * @param maxClauses A hard limit on the number of clauses to return.
+   *
    * @return The resulting {@link edu.stanford.nlp.naturalli.SentenceFragment} objects, representing the top clauses of the sentence.
    */
-  public List<SentenceFragment> topClauses(double thresholdProbability) {
+  public List<SentenceFragment> topClauses(double thresholdProbability, int maxClauses) {
     List<SentenceFragment> results = new ArrayList<>();
     search(triple -> {
       assert triple.first <= 0.0;
@@ -510,7 +513,7 @@ public List<SentenceFragment> topClauses(double thresholdProbability) {
 
   /**
    * Search, using the default weights / featurizer. This is the most common entry method for the raw search,
-   * though {@link ClauseSplitterSearchProblem#topClauses(double)} may be a more convenient method for
+   * though {@link ClauseSplitterSearchProblem#topClauses(double, int)} may be a more convenient method for
    * an end user.
    *
    * @param candidateFragments The callback function for results. The return value defines whether to continue searching.

diff --git a/src/edu/stanford/nlp/naturalli/OpenIE.java b/src/edu/stanford/nlp/naturalli/OpenIE.java
@@ -225,7 +225,7 @@ public OpenIE(Properties props) {
   @SuppressWarnings("unchecked")
   public List<SentenceFragment> clausesInSentence(SemanticGraph tree, boolean assumedTruth) {
     if (clauseSplitter.isPresent()) {
-      return clauseSplitter.get().apply(tree, assumedTruth).topClauses(splitterThreshold);
+      return clauseSplitter.get().apply(tree, assumedTruth).topClauses(splitterThreshold, 32);
     } else {
       return Collections.emptyList();
     }

diff --git a/src/edu/stanford/nlp/pipeline/KBPAnnotator.java b/src/edu/stanford/nlp/pipeline/KBPAnnotator.java
@@ -362,6 +362,9 @@ public void annotate(Annotation annotation) {
             if (subjI == objI) {
               continue;
             }
+            if (Thread.interrupted()) {
+              throw new RuntimeInterruptedException();
+            }
             CoreMap obj = candidates.get(objI);
             int objBegin = obj.get(CoreAnnotations.TokensAnnotation.class).get(0).index() - 1;
             int objEnd = obj.get(CoreAnnotations.TokensAnnotation.class).get(obj.get(CoreAnnotations.TokensAnnotation.class).size() - 1).index();

diff --git a/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java b/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java
@@ -987,6 +987,9 @@ public static CoreNLPProtos.Quote toProtoQuote(CoreMap quote) {
    * @return A CoreLabel, missing the fields that are not stored in the CoreLabel protobuf.
    */
   public CoreLabel fromProto(CoreNLPProtos.Token proto) {
+    if (Thread.interrupted()) {
+      throw new RuntimeInterruptedException();
+    }
     CoreLabel word = new CoreLabel();
     // Required fields
     word.setWord(proto.getWord());
@@ -1042,6 +1045,9 @@ public CoreLabel fromProto(CoreNLPProtos.Token proto) {
    * @return A CoreMap representing the sentence.
    */
   public CoreMap fromProto(CoreNLPProtos.Sentence proto) {
+    if (Thread.interrupted()) {
+      throw new RuntimeInterruptedException();
+    }
     CoreMap lossySentence = fromProtoNoTokens(proto);
     // Add tokens -- missing by default as they're populated as sublists of the document tokens
     List<CoreLabel> tokens = proto.getTokenList().stream().map(this::fromProto).collect(Collectors.toList());
@@ -1088,6 +1094,9 @@ public CoreMap fromProto(CoreNLPProtos.Sentence proto) {
    * @return A CoreMap, representing a sentence as stored in the protocol buffer (and therefore missing some fields)
    */
   protected CoreMap fromProtoNoTokens(CoreNLPProtos.Sentence proto) {
+    if (Thread.interrupted()) {
+      throw new RuntimeInterruptedException();
+    }
     CoreMap sentence = new ArrayCoreMap();
     // Required fields
     sentence.set(TokenBeginAnnotation.class, proto.getTokenOffsetBegin());
@@ -1182,6 +1191,9 @@ protected void loadSentenceMentions(CoreNLPProtos.Sentence proto, CoreMap senten
    * @return An Annotation corresponding to the read protobuf.
    */
   public Annotation fromProto(CoreNLPProtos.Document proto) {
+    if (Thread.interrupted()) {
+      throw new RuntimeInterruptedException();
+    }
     // Set text
     Annotation ann = new Annotation(proto.getText());
 
@@ -1410,6 +1422,9 @@ public Annotation fromProto(CoreNLPProtos.Document proto) {
    * @return A Tree object corresponding to the saved tree. This will always be a {@link LabeledScoredTreeNode}.
    */
   public Tree fromProto(CoreNLPProtos.ParseTree proto) {
+    if (Thread.interrupted()) {
+      throw new RuntimeInterruptedException();
+    }
     LabeledScoredTreeNode node = new LabeledScoredTreeNode();
     // Set label
     if (proto.hasValue()) {
@@ -1592,6 +1607,9 @@ public static SemanticGraph fromProto(CoreNLPProtos.DependencyGraph proto, List<
    * @return A relation triple as a Java object, corresponding to the seriaized proto.
    */
   public static RelationTriple fromProto(CoreNLPProtos.RelationTriple proto, List<CoreLabel> sentence, String docid) {
+    if (Thread.interrupted()) {
+      throw new RuntimeInterruptedException();
+    }
     // Get the spans for the extraction
     List<CoreLabel> subject = proto.getSubjectTokensList().stream().map(sentence::get).collect(Collectors.toList());
     List<CoreLabel> relation;
@@ -1631,6 +1649,9 @@ public static RelationTriple fromProto(CoreNLPProtos.RelationTriple proto, List<
    * @return A {@link SentenceFragment} object corresponding to the saved proto.
    */
   public static SentenceFragment fromProto(CoreNLPProtos.SentenceFragment fragment, SemanticGraph tree) {
+    if (Thread.interrupted()) {
+      throw new RuntimeInterruptedException();
+    }
     SemanticGraph fragmentTree = new SemanticGraph(tree);
     // Set the new root
     if (fragment.hasRoot()) {
@@ -1708,6 +1729,9 @@ private CorefChain fromProto(CoreNLPProtos.CorefChain proto, Annotation partialD
     Map<IntPair, Set<CorefChain.CorefMention>> mentions = new HashMap<>();
     CorefChain.CorefMention representative = null;
     for (int i = 0; i < proto.getMentionCount(); ++i) {
+      if (Thread.interrupted()) {
+        throw new RuntimeInterruptedException();
+      }
       CoreNLPProtos.CorefChain.CorefMention mentionProto = proto.getMention(i);
       // Create mention
       StringBuilder mentionSpan = new StringBuilder();

diff --git a/src/edu/stanford/nlp/pipeline/SentenceAnnotator.java b/src/edu/stanford/nlp/pipeline/SentenceAnnotator.java
@@ -5,6 +5,7 @@
 
 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.util.CoreMap;
+import edu.stanford.nlp.util.RuntimeInterruptedException;
 import edu.stanford.nlp.util.concurrent.InterruptibleMulticoreWrapper;
 import edu.stanford.nlp.util.concurrent.ThreadsafeProcessor;
 
@@ -95,6 +96,9 @@ public void annotate(Annotation annotation) {
         }
       } else {
         for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
+          if (Thread.interrupted()) {
+            throw new RuntimeInterruptedException();
+          }
           doOneSentence(annotation, sentence);
         }
       }

diff --git a/src/edu/stanford/nlp/pipeline/StanfordCoreNLPClient.java b/src/edu/stanford/nlp/pipeline/StanfordCoreNLPClient.java
@@ -46,12 +46,13 @@ public class StanfordCoreNLPClient extends AnnotationPipeline  {
    */
   private static class Backend {
     /** The protocol to connect to the server with. */
-    public final String protocol = "http";
+    public final String protocol;
     /** The hostname of the server running the CoreNLP annotators */
     public final String host;
     /** The port of the server running the CoreNLP annotators */
     public final int port;
-    public Backend(String host, int port) {
+    public Backend(String protocol, String host, int port) {
+      this.protocol = protocol;
       this.host = host;
       this.port = port;
     }
@@ -268,7 +269,10 @@ private StanfordCoreNLPClient(Properties properties, List<Backend> backends) {
    */
   @SuppressWarnings("unused")
   public StanfordCoreNLPClient(Properties properties, String host, int port) {
-    this(properties, Collections.singletonList(new Backend(host, port)));
+    this(properties, Collections.singletonList(
+        new Backend(host.startsWith("https://") ? "https" : "http",
+            host.startsWith("http://") ? host.substring("http://".length()) : (host.startsWith("https://") ? host.substring("https://".length()) : host),
+            port)));
   }
 
   /**
@@ -280,7 +284,9 @@ public StanfordCoreNLPClient(Properties properties, String host, int port) {
   public StanfordCoreNLPClient(Properties properties, String host, int port, int threads) {
     this(properties, new ArrayList<Backend>() {{
       for (int i = 0; i < threads; ++i) {
-        add(new Backend(host, port));
+        add(new Backend(host.startsWith("https://") ? "https" : "http",
+            host.startsWith("http://") ? host.substring("http://".length()) : (host.startsWith("https://") ? host.substring("https://".length()) : host),
+            port));
       }
     }});
   }
@@ -369,6 +375,7 @@ public void run() {
           // 2.3 Set some protocol-dependent properties
           switch (backend.protocol) {
             case "http":
+            case "https":
               ((HttpURLConnection) connection).setRequestMethod("POST");
               break;
             default:
@@ -393,10 +400,9 @@ public void run() {
 
           // 6. Call the callback
           callback.accept(annotation);
-        } catch (IOException e) {
-          throw new RuntimeIOException("Could not connect to server: " + backend.host + ":" + backend.port, e);
-        } catch (ClassNotFoundException e) {
+        } catch (IOException | ClassNotFoundException e) {
           e.printStackTrace();
+          callback.accept(null);
         }
       }
     }.start());
@@ -568,9 +574,11 @@ public static void main(String[] args) throws IOException, ClassNotFoundExceptio
       if (spec.contains(":")) {
         String host = spec.substring(0, spec.indexOf(":"));
         int port = Integer.parseInt(spec.substring(spec.indexOf(":") + 1));
-        backends.add(new Backend(host, port));
+        backends.add(new Backend(host.startsWith("https://") ? "https" : "http",
+            host.startsWith("http://") ? host.substring("http://".length()) : (host.startsWith("https://") ? host.substring("https://".length()) : host),
+            port));
       } else {
-        backends.add(new Backend(spec, 80));
+        backends.add(new Backend("http", spec, 80));
       }
     }