Load serialized word2vec if not

stanfordnlp · Oct 18, 2015 · 4c16135 · 4c16135
1 parent affae43
commit 4c16135
Show file tree

Hide file tree

Showing 16 changed files with 40 additions and 48 deletions.
diff --git a/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java b/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java
@@ -1,6 +1,6 @@
 package edu.stanford.nlp.pipeline;
 
-import edu.stanford.nlp.hcoref.CorefCoreAnnotations;
+import edu.stanford.nlp.dcoref.CorefCoreAnnotations;
 import edu.stanford.nlp.ie.NumberNormalizer;
 import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
 import edu.stanford.nlp.ling.CoreAnnotations;
@@ -463,7 +463,7 @@ public void testAllAnnotatorCombinations() {
       if (!annotatorsToConsider.isEmpty()) { continue; }  // continue if we couldn't add all the annotators
 
       // Create pipeline
-      if (!annotators.contains("dcoref") && !annotators.contains("entitymentions")) {  // TODO(gabor) eventually, don't ignore entitymentions!
+      if (!annotators.contains("hcoref") && !annotators.contains("entitymentions")) {  // TODO(gabor) eventually, don't ignore this!
         System.err.println(">>TESTING " + StringUtils.join(annotators, ","));
         testAnnotators(StringUtils.join(annotators, ","));
       }

diff --git a/src/edu/stanford/nlp/dcoref/CorefCoreAnnotations.java b/src/edu/stanford/nlp/dcoref/CorefCoreAnnotations.java
@@ -73,7 +73,7 @@ public Class<Integer> getType() {
    * CoreLabel.  Note that the list includes the CoreLabel that was
    * annotated which creates a cycle.
    *
-   * @deprecated This was an original dcoref annotation. You should now use CorefChainAnnotation
+   * @deprecated This was an original dcoref annotation. You should know use CorefChainAnnotation
    */
   @Deprecated
   public static class CorefClusterAnnotation implements CoreAnnotation<Set<CoreLabel>> {

diff --git a/src/edu/stanford/nlp/hcoref/CorefProperties.java b/src/edu/stanford/nlp/hcoref/CorefProperties.java
@@ -195,8 +195,8 @@ public static String getPathSingletonPredictor(Properties props) {
     return PropertiesUtils.getString(props, PATH_SINGLETON_PREDICTOR_PROP, "edu/stanford/nlp/models/dcoref/singleton.predictor.ser");
   }
   public static String getPathModel(Properties props, String sievename) {
-    return props.getProperty(PATH_SERIALIZED_PROP) + File.separator +
-        props.getProperty(PATH_MODEL_PROP.replace("SIEVENAME", sievename), "MISSING_MODEL_FOR_"+sievename);
+    return new File(props.getProperty(PATH_SERIALIZED_PROP),
+        props.getProperty(PATH_MODEL_PROP.replace("SIEVENAME", sievename), "MISSING_MODEL_FOR_"+sievename)).getAbsolutePath();
   }
   public static boolean debug(Properties props) {
     return PropertiesUtils.getBool(props, DEBUG_PROP, false);

diff --git a/src/edu/stanford/nlp/hcoref/data/Dictionaries.java b/src/edu/stanford/nlp/hcoref/data/Dictionaries.java
@@ -535,15 +535,16 @@ public void loadSemantics(Properties props) throws ClassNotFoundException, IOExc
     if(CorefProperties.loadWordEmbedding(props)) {
       System.err.println("LOAD: WordVectors");
       String wordvectorFile = CorefProperties.getPathSerializedWordVectors(props);
-      if(new File(wordvectorFile).exists()) {
-        vectors = VectorMap.deserialize(wordvectorFile);
-        dimVector = vectors.entrySet().iterator().next().getValue().length;
-      } else {
-        vectors = VectorMap.readWord2Vec(CorefProperties.getPathWord2Vec(props));
+      String word2vecFile = CorefProperties.getPathWord2Vec(props);
+      if(new File(word2vecFile).exists()) {
+        vectors = VectorMap.readWord2Vec(word2vecFile);
         if (wordvectorFile != null && !wordvectorFile.startsWith("edu")) {
           vectors.serialize(wordvectorFile);
         }
+      } else {
+        vectors = VectorMap.deserialize(wordvectorFile);
       }
+      dimVector = vectors.entrySet().iterator().next().getValue().length;
 
 //    if(Boolean.parseBoolean(props.getProperty("useValDictionary"))) {
 //      System.err.println("LOAD: ValDictionary");

diff --git a/src/edu/stanford/nlp/hcoref/docreader/CoNLLDocumentReader.java b/src/edu/stanford/nlp/hcoref/docreader/CoNLLDocumentReader.java
@@ -125,11 +125,7 @@ public CoNLLDocumentReader(String filepath)
   public CoNLLDocumentReader(String filepath, Options options)
   {
 //    this.filepath = filepath;
-    if (filepath != null && new File(filepath).exists()) {
-      this.fileList = getFiles(filepath, options.filePattern);
-    } else {
-      this.fileList = Collections.EMPTY_LIST;
-    }
+    this.fileList = getFiles(filepath, options.filePattern);
     this.options = options;
     if (options.sortFiles) {
       Collections.sort(this.fileList);

diff --git a/src/edu/stanford/nlp/hcoref/md/DependencyCorefMentionFinder.java b/src/edu/stanford/nlp/hcoref/md/DependencyCorefMentionFinder.java
@@ -35,7 +35,7 @@ public class DependencyCorefMentionFinder extends CorefMentionFinder {
   public DependencyCorefMentionFinder(Properties props) throws ClassNotFoundException, IOException {
     this.lang = CorefProperties.getLanguage(props);
     mdClassifier = (CorefProperties.isMentionDetectionTraining(props))? 
-        null : IOUtils.readObjectFromURLOrClasspathOrFileSystem(CorefProperties.getPathModel(props, "md"));
+        null : IOUtils.readObjectFromFile(CorefProperties.getPathModel(props, "md"));
   }
 
   public MentionDetectionClassifier mdClassifier = null;

diff --git a/src/edu/stanford/nlp/hcoref/md/HybridCorefMentionFinder.java b/src/edu/stanford/nlp/hcoref/md/HybridCorefMentionFinder.java
@@ -32,7 +32,7 @@ public HybridCorefMentionFinder(HeadFinder headFinder, Properties props) throws
     this.headFinder = headFinder;
     this.lang = CorefProperties.getLanguage(props);
     mdClassifier = (CorefProperties.isMentionDetectionTraining(props))?
-        null : IOUtils.readObjectFromURLOrClasspathOrFileSystem(CorefProperties.getPathModel(props, "md"));
+        null : IOUtils.readObjectFromFile(CorefProperties.getPathModel(props, "md"));
   }
 
   @Override

diff --git a/src/edu/stanford/nlp/hcoref/md/MentionDetectionClassifier.java b/src/edu/stanford/nlp/hcoref/md/MentionDetectionClassifier.java
@@ -81,7 +81,7 @@ public static Counter<String> extractFeatures(Mention p, Set<Mention> shares, Se
 
   public static MentionDetectionClassifier loadMentionDetectionClassifier(String filename) throws ClassNotFoundException, IOException {
     System.err.print("loading MentionDetectionClassifier ...");
-    MentionDetectionClassifier mdc = IOUtils.readObjectFromURLOrClasspathOrFileSystem(filename);
+    MentionDetectionClassifier mdc = IOUtils.readObjectFromFile(filename);
     System.err.println("done");
     return mdc;
   }

diff --git a/src/edu/stanford/nlp/hcoref/sieve/Sieve.java b/src/edu/stanford/nlp/hcoref/sieve/Sieve.java
@@ -111,7 +111,7 @@ public static Sieve loadSieve(Properties props, String sievename) throws Excepti
         return sieve;
 
       case RF:
-        RFSieve rfsieve = IOUtils.readObjectFromURLOrClasspathOrFileSystem(CorefProperties.getPathModel(props, sievename));
+        RFSieve rfsieve = IOUtils.readObjectFromFile(CorefProperties.getPathModel(props, sievename));
         rfsieve.thresMerge = CorefProperties.getMergeThreshold(props, sievename);
         System.err.println("Done.\nMerging threshold: "+rfsieve.thresMerge);
         return rfsieve;

diff --git a/src/edu/stanford/nlp/parser/dvparser/DVParser.java b/src/edu/stanford/nlp/parser/dvparser/DVParser.java
@@ -371,7 +371,7 @@ public static DVParser loadModel(String filename, String[] args) {
     System.err.println("Loading serialized model from " + filename);
     DVParser dvparser;
     try {
-      dvparser = IOUtils.readObjectFromURLOrClasspathOrFileSystem(filename);
+      dvparser = IOUtils.readObjectFromFile(filename);
       dvparser.op.setOptions(args);
     } catch (IOException e) {
       throw new RuntimeIOException(e);

diff --git a/src/edu/stanford/nlp/pipeline/Annotator.java b/src/edu/stanford/nlp/pipeline/Annotator.java
@@ -158,10 +158,6 @@ public String toString() {
         add(POS_REQUIREMENT);
         add(LEMMA_REQUIREMENT);
       }}));
-      put(STANFORD_REGEXNER, Collections.unmodifiableSet(new HashSet<Requirement>() {{
-          add(TOKENIZE_REQUIREMENT);
-          add(SSPLIT_REQUIREMENT);
-      }}));
       put(STANFORD_GENDER, Collections.unmodifiableSet(new HashSet<Requirement>() {{
         add(TOKENIZE_REQUIREMENT);
         add(SSPLIT_REQUIREMENT);
@@ -196,7 +192,7 @@ public String toString() {
         add(POS_REQUIREMENT);
         add(LEMMA_REQUIREMENT);
         add(NER_REQUIREMENT);
-        add(DEPENDENCY_REQUIREMENT);
+        add(PARSE_REQUIREMENT);
       }}));
       put(STANFORD_RELATION, Collections.unmodifiableSet(new HashSet<Requirement>() {{
         add(TOKENIZE_REQUIREMENT);

diff --git a/src/edu/stanford/nlp/pipeline/HybridCorefAnnotator.java b/src/edu/stanford/nlp/pipeline/HybridCorefAnnotator.java
@@ -1,18 +1,26 @@
 package edu.stanford.nlp.pipeline;
 
-import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
 
 import edu.stanford.nlp.hcoref.CorefCoreAnnotations;
 import edu.stanford.nlp.hcoref.CorefCoreAnnotations.CorefChainAnnotation;
 import edu.stanford.nlp.hcoref.CorefSystem;
 import edu.stanford.nlp.hcoref.data.CorefChain;
 import edu.stanford.nlp.hcoref.data.CorefChain.CorefMention;
 import edu.stanford.nlp.hcoref.data.Document;
-import edu.stanford.nlp.io.IOUtils;
 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.ling.CoreLabel;
-import edu.stanford.nlp.util.*;
+import edu.stanford.nlp.util.ArraySet;
+import edu.stanford.nlp.util.CoreMap;
+import edu.stanford.nlp.util.Generics;
+import edu.stanford.nlp.util.IntTuple;
+import edu.stanford.nlp.util.Pair;
+import edu.stanford.nlp.util.StringUtils;
 
 public class HybridCorefAnnotator extends TextAnnotationCreator implements Annotator {
 
@@ -25,19 +33,7 @@ public class HybridCorefAnnotator extends TextAnnotationCreator implements Annot
 
   public HybridCorefAnnotator(Properties props) {
     try {
-      // Load the default properties
-      Properties corefProps = new Properties();
-      try {
-        corefProps.load(IOUtils.readerFromString("edu/stanford/nlp/hcoref/properties/coref-default-dep.properties"));
-      } catch (IOException ignored) { }
-      // Add passed properties
-      Enumeration<Object> keys = props.keys();
-      while (keys.hasMoreElements()) {
-        String key = keys.nextElement().toString();
-        corefProps.setProperty(key, props.getProperty(key));
-      }
-      // Create coref system
-      corefSystem = new CorefSystem(corefProps);
+      corefSystem = new CorefSystem(props);
       OLD_FORMAT = Boolean.parseBoolean(props.getProperty("oldCorefFormat", "false"));
     } catch (Exception e) {
       System.err.println("ERROR: cannot create HybridCorefAnnotator!");

diff --git a/src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java b/src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java
@@ -662,7 +662,7 @@ private static void printRequiredProperties(PrintStream os) {
     os.println("(if -props or -annotators is not passed in, default properties will be loaded via the classpath)");
     os.println("\t\"props\" - path to file with configuration properties");
     os.println("\t\"annotators\" - comma separated list of annotators");
-    os.println("\tThe following annotators are supported: cleanxml, tokenize, quote, ssplit, pos, lemma, ner, truecase, parse, hcoref, relation");
+    os.println("\tThe following annotators are supported: cleanxml, tokenize, quote, ssplit, pos, lemma, ner, truecase, parse, coref, dcoref, relation");
 
     os.println();
     os.println("\tIf annotator \"tokenize\" is defined:");

diff --git a/src/edu/stanford/nlp/pipeline/StanfordCoreNLP.properties b/src/edu/stanford/nlp/pipeline/StanfordCoreNLP.properties
@@ -1,4 +1,4 @@
-annotators = tokenize, ssplit, pos, lemma, ner, parse, hcoref
+annotators = tokenize, ssplit, pos, lemma, ner, parse, dcoref
 
 # Some other annotators are also available for English and can be optionally loaded, e.g.:
 # annotators = tokenize, ssplit, pos, lemma, truecase

diff --git a/src/edu/stanford/nlp/pipeline/StanfordCoreNLPServer.java b/src/edu/stanford/nlp/pipeline/StanfordCoreNLPServer.java
@@ -300,8 +300,13 @@ public void handle(HttpExchange httpExchange) throws IOException {
         }
         log("[" + httpExchange.getRemoteAddress() + "] API call");
       } catch (Exception e) {
+        // Return error message.
         e.printStackTrace();
-        respondError("Could not handle incoming annotation", httpExchange);
+        String response = e.getMessage();
+        httpExchange.getResponseHeaders().add("Content-Type", "text/plain");
+        httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length());
+        httpExchange.getResponseBody().write(response.getBytes());
+        httpExchange.close();
         return;
       }
 
@@ -326,10 +331,8 @@ public void handle(HttpExchange httpExchange) throws IOException {
         httpExchange.getResponseBody().write(response);
         httpExchange.close();
       } catch (TimeoutException e) {
-        e.printStackTrace();
         respondError("CoreNLP request timed out", httpExchange);
       } catch (Exception e) {
-        e.printStackTrace();
         // Return error message.
         respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
       }

diff --git a/src/edu/stanford/nlp/simple/Document.java b/src/edu/stanford/nlp/simple/Document.java
@@ -509,7 +509,7 @@ public Map<Integer, CorefChain> coref(Properties props) {
     synchronized (this.impl) {
       if (impl.getCorefChainCount() == 0) {
         // Run prerequisites
-        this.runNER(props).runDepparse(props);  // hcoref needs dependencies only
+        this.runNER(props).runParse(props);
         // Run coref
         Annotator coref = props == EMPTY_PROPS ? defaultCoref.get() : AnnotatorFactories.coref(props, backend).create();
         Annotation ann = asAnnotation();