Merge branch 'master' of jamie:/u/nlp/git/javanlp

stanfordnlp · Dec 4, 2015 · 52ca828 · 52ca828
1 parent aa82c49
commit 52ca828
Show file tree

Hide file tree

Showing 15 changed files with 479 additions and 950 deletions.
diff --git a/doc/corenlp/pom-full.xml b/doc/corenlp/pom-full.xml
@@ -43,7 +43,7 @@
     <dependency>
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
-      <version>2.1</version>
+      <version>2.9</version>
     </dependency>
     <dependency>
       <groupId>de.jollyday</groupId>

diff --git a/itest/src/edu/stanford/nlp/pipeline/AnnotationOutputterITest.java b/itest/src/edu/stanford/nlp/pipeline/AnnotationOutputterITest.java
@@ -24,13 +24,13 @@ public void testSimpleSentenceCoNLL() throws IOException {
             "2\tcat\tcat\tNN\tO\t4\tnsubj\n" +
             "3\tis\tbe\tVBZ\tO\t4\tcop\n" +
             "4\tfat\tfat\tJJ\tO\t0\tROOT\n" +
-            "5\t.\t.\t.\tO\t_\t_\n" +
+            "5\t.\t.\t.\tO\t4\tpunct\n" +
             "\n" +
             "1\tThe\tthe\tDT\tO\t2\tdet\n" +
             "2\tdog\tdog\tNN\tO\t4\tnsubj\n" +
             "3\tis\tbe\tVBZ\tO\t4\tcop\n" +
             "4\tlazy\tlazy\tJJ\tO\t0\tROOT\n" +
-            "5\t.\t.\t.\tO\t_\t_\n" +
+            "5\t.\t.\t.\tO\t4\tpunct\n" +
             "\n";
     assertEquals(expected, actual);
   }

diff --git a/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java b/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java
@@ -304,7 +304,7 @@ public void testSaveSize() throws IOException {
     assertNotNull(compressedProto);
 
     // Check size
-    assertTrue("" + compressedProto.length, compressedProto.length < 330000);
+    assertTrue("" + compressedProto.length, compressedProto.length < 340000);
     assertTrue("" + uncompressedProto.length, uncompressedProto.length < 1700000);
   }
 

diff --git a/src/edu/stanford/nlp/classify/WeightedRVFDataset.java b/src/edu/stanford/nlp/classify/WeightedRVFDataset.java
@@ -11,6 +11,8 @@
  */
 public class WeightedRVFDataset<L, F> extends RVFDataset<L, F> {
 
+  private static final long serialVersionUID = 1L;
+
   float[] weights = new float[16];
 
   public WeightedRVFDataset() {

diff --git a/src/edu/stanford/nlp/hcoref/README.txt b/src/edu/stanford/nlp/hcoref/README.txt
@@ -5,7 +5,7 @@ which can be used in pipeline by adding 'hcoref' annotator.
 It includes all deterministic sieves used in dcoref system
 (however, some behaviors might be changed to make the system simpler),
 machine learning sieves based on random forest,
-and Oracle sieves for the system analysis.
+and oracle sieves for the system analysis.
 
 System training requires hcoref.train package in research,
 weka 3.7.12 (http://www.cs.waikato.ac.nz/ml/weka/documentation.html),
@@ -53,7 +53,7 @@ Here is an example code to use the system (See pipeline.HybridCorefAnnotator).
   import edu.stanford.nlp.hcoref.data.CorefChain;
   import edu.stanford.nlp.hcoref.data.CorefChain.CorefMention;
   import edu.stanford.nlp.hcoref.data.Document;
-  
+
   CorefSystem corefSystem = new CorefSystem(props);
   Document corefDoc = corefSystem.docMaker.makeDocument(annotation);
   Map<Integer, CorefChain> result = corefSystem.coref(corefDoc);

diff --git a/src/edu/stanford/nlp/pipeline/CoreNLP.proto b/src/edu/stanford/nlp/pipeline/CoreNLP.proto
@@ -264,6 +264,7 @@ message Mention {
   optional IndexedWord headIndexedWord = 30;
   optional IndexedWord   dependingVerb = 31;
   optional IndexedWord       headWord  = 32;
+  optional SpeakerInfo    speakerInfo  = 33;
 
   repeated IndexedWord         sentenceWords = 50;
   repeated IndexedWord         originalSpan = 51;
@@ -293,16 +294,8 @@ message IndexedWord {
 //
 
 message SpeakerInfo {
-  optional string speakerId            = 1;
+  optional string speakerName          = 1;
-  optional string speakerName          = 2;
+  repeated int32 mentions    = 2;
-  optional string speakerDesc          = 3;
-  optional bool speakerIdIsNumber      = 4;
-  optional bool speakerIdIsAutoDetermined = 5;
-
-
-  repeated string speakerNameStrings   = 6;
-  repeated int32 mentions    = 7;
-
 }
 
 //